From 7d9254014a5e9a97b00529735cbe3a70f4b419bc Mon Sep 17 00:00:00 2001
From: Qiao Pengcheng <qiaopengcheng@loongson.cn>
Date: Wed, 15 Dec 2021 21:54:36 +0800
Subject: [PATCH 01/46] Part6-1: add the coreclr-jit directory for LoongArch64.
 (#59561)

Co-authored-by: Loongson's .NET-teams
---
 src/coreclr/jit/ICorJitInfo_API_names.h     |    1 +
 src/coreclr/jit/ICorJitInfo_API_wrapper.hpp |   18 +-
 src/coreclr/jit/codegen.h                   |  127 +-
 src/coreclr/jit/codegencommon.cpp           | 1043 ++-
 src/coreclr/jit/codegeninterface.h          |   15 +-
 src/coreclr/jit/codegenlinear.cpp           |    9 +-
 src/coreclr/jit/codegenloongarch64.cpp      | 9157 +++++++++++++++++++
 src/coreclr/jit/compiler.cpp                |   95 +-
 src/coreclr/jit/compiler.h                  |   84 +-
 src/coreclr/jit/compiler.hpp                |   12 +-
 src/coreclr/jit/ee_il_dll.cpp               |    8 +
 src/coreclr/jit/emit.cpp                    |  269 +-
 src/coreclr/jit/emit.h                      |  192 +-
 src/coreclr/jit/emitdef.h                   |    2 +
 src/coreclr/jit/emitfmts.h                  |    2 +
 src/coreclr/jit/emitfmtsloongarch64.h       |   82 +
 src/coreclr/jit/emitinl.h                   |   56 +
 src/coreclr/jit/emitjmps.h                  |   20 +
 src/coreclr/jit/emitloongarch64.cpp         | 6780 ++++++++++++++
 src/coreclr/jit/emitloongarch64.h           |  421 +
 src/coreclr/jit/emitpub.h                   |    4 +-
 src/coreclr/jit/error.h                     |   13 +-
 src/coreclr/jit/gentree.cpp                 |  104 +-
 src/coreclr/jit/gentree.h                   |    5 +
 src/coreclr/jit/importer.cpp                |  178 +-
 src/coreclr/jit/instr.cpp                   |  132 +
 src/coreclr/jit/instr.h                     |   47 +-
 src/coreclr/jit/instrs.h                    |    2 +
 src/coreclr/jit/instrsloongarch64.h         |  499 +
 src/coreclr/jit/jit.h                       |   66 +-
 src/coreclr/jit/jitconfigvalues.h           |   17 +
 src/coreclr/jit/jiteh.cpp                   |    2 +-
 src/coreclr/jit/lclvars.cpp                 |  348 +-
 src/coreclr/jit/lower.cpp                   |  111 +-
 src/coreclr/jit/lower.h                     |    2 +-
 src/coreclr/jit/lowerloongarch64.cpp        | 1693 ++++
 src/coreclr/jit/lsra.cpp                    |   23 +-
 src/coreclr/jit/lsra.h                      |    8 +
 src/coreclr/jit/lsrabuild.cpp               |    9 +
 src/coreclr/jit/lsraloongarch64.cpp         | 1731 ++++
 src/coreclr/jit/morph.cpp                   |  478 +-
 src/coreclr/jit/optimizer.cpp               |    7 +
 src/coreclr/jit/regalloc.cpp                |   23 +
 src/coreclr/jit/register.h                  |    3 +
 src/coreclr/jit/register_arg_convention.cpp |   11 +
 src/coreclr/jit/registerloongarch64.h       |  108 +
 src/coreclr/jit/regset.cpp                  |    4 +-
 src/coreclr/jit/regset.h                    |    2 +
 src/coreclr/jit/scopeinfo.cpp               |   27 +-
 src/coreclr/jit/target.h                    |   37 +-
 src/coreclr/jit/targetloongarch64.cpp       |   30 +
 src/coreclr/jit/targetloongarch64.h         |  336 +
 src/coreclr/jit/unwind.cpp                  |    6 +-
 src/coreclr/jit/unwind.h                    |   17 +-
 src/coreclr/jit/unwindloongarch64.cpp       | 2347 +++++
 src/coreclr/jit/utils.cpp                   |   18 +-
 src/coreclr/jit/valuenum.cpp                |   28 +-
 src/coreclr/jit/valuenumfuncs.h             |    4 +
 58 files changed, 26683 insertions(+), 190 deletions(-)
 create mode 100644 src/coreclr/jit/codegenloongarch64.cpp
 create mode 100644 src/coreclr/jit/emitfmtsloongarch64.h
 create mode 100644 src/coreclr/jit/emitloongarch64.cpp
 create mode 100644 src/coreclr/jit/emitloongarch64.h
 create mode 100644 src/coreclr/jit/instrsloongarch64.h
 create mode 100644 src/coreclr/jit/lowerloongarch64.cpp
 create mode 100644 src/coreclr/jit/lsraloongarch64.cpp
 create mode 100644 src/coreclr/jit/registerloongarch64.h
 create mode 100644 src/coreclr/jit/targetloongarch64.cpp
 create mode 100644 src/coreclr/jit/targetloongarch64.h
 create mode 100644 src/coreclr/jit/unwindloongarch64.cpp

diff --git a/src/coreclr/jit/ICorJitInfo_API_names.h b/src/coreclr/jit/ICorJitInfo_API_names.h
index d373091453220..f9597085d11df 100644
--- a/src/coreclr/jit/ICorJitInfo_API_names.h
+++ b/src/coreclr/jit/ICorJitInfo_API_names.h
@@ -122,6 +122,7 @@ DEF_CLR_API(getMethodNameFromMetadata)
 DEF_CLR_API(getMethodHash)
 DEF_CLR_API(findNameOfToken)
 DEF_CLR_API(getSystemVAmd64PassStructInRegisterDescriptor)
+DEF_CLR_API(getFieldTypeByHnd)
 DEF_CLR_API(getThreadTLSIndex)
 DEF_CLR_API(getInlinedCallFrameVptr)
 DEF_CLR_API(getAddrOfCaptureThreadGlobal)
diff --git a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp
index 1e2627ccb3c9a..8326f17cfc7ee 100644
--- a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp
+++ b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp
@@ -986,14 +986,30 @@ CORINFO_ARG_LIST_HANDLE WrapICorJitInfo::getArgNext(
 CorInfoTypeWithMod WrapICorJitInfo::getArgType(
           CORINFO_SIG_INFO* sig,
           CORINFO_ARG_LIST_HANDLE args,
-          CORINFO_CLASS_HANDLE* vcTypeRet)
+          CORINFO_CLASS_HANDLE* vcTypeRet
+#if defined(TARGET_LOONGARCH64)
+          ,int *flags = NULL
+#endif
+)
 {
     API_ENTER(getArgType);
+#if defined(TARGET_LOONGARCH64)
+    CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet, flags);
+#else
     CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet);
+#endif
     API_LEAVE(getArgType);
     return temp;
 }
 
+uint32_t WrapICorJitInfo::getFieldTypeByHnd(CORINFO_CLASS_HANDLE cls)
+{
+    API_ENTER(getFieldTypeByHnd);
+    DWORD temp = wrapHnd->getFieldTypeByHnd(cls);
+    API_LEAVE(getFieldTypeByHnd);
+    return temp;
+}
+
 CORINFO_CLASS_HANDLE WrapICorJitInfo::getArgClass(
           CORINFO_SIG_INFO* sig,
           CORINFO_ARG_LIST_HANDLE args)
diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 343bcb138300b..1960d6ae55320 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -237,7 +237,12 @@ class CodeGen final : public CodeGenInterface
 
     void genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, BasicBlock* failBlk = nullptr);
 
+#ifdef TARGET_LOONGARCH64
+    void genSetRegToIcon(regNumber reg, ssize_t val, var_types type);
+    void genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk = nullptr, regNumber reg2 = REG_R0);
+#else
     void genCheckOverflow(GenTree* tree);
+#endif
 
     //-------------------------------------------------------------------------
     //
@@ -253,7 +258,11 @@ class CodeGen final : public CodeGenInterface
     //
 
     void genEstablishFramePointer(int delta, bool reportUnwindData);
+#if defined(TARGET_LOONGARCH64)
+    void genFnPrologCalleeRegArgs();
+#else
     void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState);
+#endif
     void genEnregisterIncomingStackArgs();
     void genCheckUseBlockInit();
 #if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
@@ -319,6 +328,67 @@ class CodeGen final : public CodeGenInterface
     void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta);
     void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta);
 
+    void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);
+
+#elif defined(TARGET_LOONGARCH64)
+    bool genInstrWithConstant(instruction ins,
+                              emitAttr    attr,
+                              regNumber   reg1,
+                              regNumber   reg2,
+                              ssize_t     imm,
+                              regNumber   tmpReg,
+                              bool        inUnwindRegion = false);
+
+    void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData);
+
+    void genPrologSaveRegPair(regNumber reg1,
+                              regNumber reg2,
+                              int       spOffset,
+                              int       spDelta,
+                              bool      useSaveNextPair,
+                              regNumber tmpReg,
+                              bool*     pTmpRegIsZero);
+
+    void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
+
+    void genEpilogRestoreRegPair(regNumber reg1,
+                                 regNumber reg2,
+                                 int       spOffset,
+                                 int       spDelta,
+                                 bool      useSaveNextPair,
+                                 regNumber tmpReg,
+                                 bool*     pTmpRegIsZero);
+
+    void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
+
+    // A simple struct to keep register pairs for prolog and epilog.
+    struct RegPair
+    {
+        regNumber reg1;
+        regNumber reg2;
+        bool      useSaveNextPair;
+
+        RegPair(regNumber reg1) : reg1(reg1), reg2(REG_NA), useSaveNextPair(false)
+        {
+        }
+
+        RegPair(regNumber reg1, regNumber reg2) : reg1(reg1), reg2(reg2), useSaveNextPair(false)
+        {
+            assert(reg2 == REG_NEXT(reg1));
+        }
+    };
+
+    static void genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* regStack);
+    static void genSetUseSaveNextPairs(ArrayStack<RegPair>* regStack);
+
+    static int genGetSlotSizeForRegsInMask(regMaskTP regsMask);
+
+    void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);
+    void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);
+
+    void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta);
+    void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta);
+
     void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);
 #else
     void genPushCalleeSavedRegisters();
@@ -400,7 +470,25 @@ class CodeGen final : public CodeGenInterface
 
     FuncletFrameInfoDsc genFuncletInfo;
 
-#endif // TARGET_AMD64
+#elif defined(TARGET_LOONGARCH64)
+
+    // A set of information that is used by funclet prolog and epilog generation.
+    // It is collected once, before funclet prologs and epilogs are generated,
+    // and used by all funclet prologs and epilogs, which must all be the same.
+    struct FuncletFrameInfoDsc
+    {
+        regMaskTP fiSaveRegs;                // Set of callee-saved registers saved in the funclet prolog (includes RA)
+        int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function
+                                             // (negative)
+        int fiSP_to_FPRA_save_delta;         // FP/RA register save offset from SP (positive)
+        int fiSP_to_PSP_slot_delta;          // PSP slot offset from SP (positive)
+        int fiCallerSP_to_PSP_slot_delta;    // PSP slot offset from Caller SP (negative)
+        int fiFrameType;                     // Funclet frame types are numbered. See genFuncletProlog() for details.
+        int fiSpDelta1;                      // Stack pointer delta 1 (negative)
+    };
+
+    FuncletFrameInfoDsc genFuncletInfo;
+#endif // TARGET_LOONGARCH64
 
 #if defined(TARGET_XARCH)
 
@@ -520,6 +608,10 @@ class CodeGen final : public CodeGenInterface
     void genArm64EmitterUnitTests();
 #endif
 
+#if defined(DEBUG) && defined(TARGET_LOONGARCH64)
+    void genLOONGARCH64EmitterUnitTests();
+#endif
+
 #if defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64)
     void genAmd64EmitterUnitTests();
 #endif
@@ -530,6 +622,12 @@ class CodeGen final : public CodeGenInterface
     bool         genSaveFpLrWithAllCalleeSavedRegisters;
 #endif // TARGET_ARM64
 
+#ifdef TARGET_LOONGARCH64
+    virtual void SetSaveFpRaWithAllCalleeSavedRegisters(bool value);
+    virtual bool IsSaveFpRaWithAllCalleeSavedRegisters() const;
+    bool         genSaveFpRaWithAllCalleeSavedRegisters;
+#endif // TARGET_LOONGARCH64
+
     //-------------------------------------------------------------------------
     //
     // End prolog/epilog generation
@@ -835,10 +933,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genLeaInstruction(GenTreeAddrMode* lea);
     void genSetRegToCond(regNumber dstReg, GenTree* tree);
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     void genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale);
     void genCodeForMulLong(GenTreeOp* mul);
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 #if !defined(TARGET_64BIT)
     void genLongToIntCast(GenTree* treeNode);
@@ -1231,6 +1329,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genStoreRegToStackArg(var_types type, regNumber reg, int offset);
 #endif // FEATURE_PUT_STRUCT_ARG_STK
 
+#ifdef TARGET_LOONGARCH64
+    //TODO for LOONGARCH64 : maybe delete on LA64?
+    void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset);
+#endif
+
     void genCodeForStoreBlk(GenTreeBlk* storeBlkNode);
 #ifndef TARGET_X86
     void genCodeForInitBlkHelper(GenTreeBlk* initBlkNode);
@@ -1241,7 +1344,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genTableBasedSwitch(GenTree* tree);
     void genCodeForArrIndex(GenTreeArrIndex* treeNode);
     void genCodeForArrOffset(GenTreeArrOffs* treeNode);
+#if defined(TARGET_LOONGARCH64)
+    instruction genGetInsForOper(GenTree* treeNode);
+#else
     instruction genGetInsForOper(genTreeOps oper, var_types type);
+#endif
     bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
     GenTree* getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd);
     regNumber getCallIndirectionCellReg(const GenTreeCall* call);
@@ -1250,7 +1357,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genJmpMethod(GenTree* jmp);
     BasicBlock* genCallFinally(BasicBlock* block);
     void genCodeForJumpTrue(GenTreeOp* jtrue);
-#ifdef TARGET_ARM64
+#if defined(TARGET_LOONGARCH64)
+    //TODO: refactor for LA.
+    void genCodeForJumpCompare(GenTreeOp* tree);
+#endif
+#if defined(TARGET_ARM64)
     void genCodeForJumpCompare(GenTreeOp* tree);
     void genCodeForMadd(GenTreeOp* tree);
     void genCodeForBfiz(GenTreeOp* tree);
@@ -1266,6 +1377,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode);
     void genMultiRegStoreToLocal(GenTreeLclVar* lclNode);
 
+#if defined(TARGET_LOONGARCH64)
+    void genMultiRegCallStoreToLocal(GenTree* treeNode);
+#endif
+
     // Codegen for multi-register struct returns.
     bool isStructReturn(GenTree* treeNode);
 #ifdef FEATURE_SIMD
@@ -1281,9 +1396,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genFloatReturn(GenTree* treeNode);
 #endif // TARGET_X86
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64)|| defined(TARGET_LOONGARCH64)
     void genSimpleReturn(GenTree* treeNode);
-#endif // TARGET_ARM64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
 
     void genReturn(GenTree* treeNode);
 
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 237a22c6782f0..b2b2f0d77a9ef 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -138,6 +138,11 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
 #ifdef TARGET_ARM64
     genSaveFpLrWithAllCalleeSavedRegisters = false;
 #endif // TARGET_ARM64
+
+#ifdef TARGET_LOONGARCH64
+    SetHasTailCalls(false);
+    genSaveFpRaWithAllCalleeSavedRegisters = false;
+#endif // TARGET_LOONGARCH64
 }
 
 void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
@@ -596,7 +601,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
         case CORINFO_HELP_ASSIGN_BYREF:
 #if defined(TARGET_AMD64)
             return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC;
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
             return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;
 #elif defined(TARGET_X86)
             return RBM_ESI | RBM_EDI | RBM_ECX;
@@ -605,7 +610,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
             return RBM_CALLEE_TRASH;
 #endif
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         case CORINFO_HELP_ASSIGN_REF:
         case CORINFO_HELP_CHECKED_ASSIGN_REF:
             return RBM_CALLEE_TRASH_WRITEBARRIER;
@@ -1101,7 +1106,9 @@ void CodeGen::genAdjustSP(target_ssize_t delta)
         inst_RV(INS_pop, REG_ECX, TYP_INT);
     else
 #endif
+#ifndef TARGET_LOONGARCH64
         inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
+#endif
 }
 
 //------------------------------------------------------------------------
@@ -1154,7 +1161,7 @@ void CodeGen::genAdjustStackLevel(BasicBlock* block)
 #endif // !FEATURE_FIXED_OUT_ARGS
 }
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 // return size
 // alignmentWB is out param
 unsigned CodeGenInterface::InferOpSizeAlign(GenTree* op, unsigned* alignmentWB)
@@ -1238,7 +1245,7 @@ unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignme
     return opSize;
 }
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 /*****************************************************************************
  *
@@ -1383,7 +1390,7 @@ bool CodeGen::genCreateAddrMode(
 
         cns += op2->AsIntConCommon()->IconValue();
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         if (cns == 0)
 #endif
         {
@@ -1403,7 +1410,7 @@ bool CodeGen::genCreateAddrMode(
 
                     goto AGAIN;
 
-#if !defined(TARGET_ARMARCH)
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
                 // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
                 case GT_MUL:
                     if (op1->gtOverflow())
@@ -1426,7 +1433,7 @@ bool CodeGen::genCreateAddrMode(
                         goto FOUND_AM;
                     }
                     break;
-#endif // !defined(TARGET_ARMARCH)
+#endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
 
                 default:
                     break;
@@ -1447,7 +1454,7 @@ bool CodeGen::genCreateAddrMode(
 
     switch (op1->gtOper)
     {
-#if !defined(TARGET_ARMARCH)
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
         // TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
         case GT_ADD:
 
@@ -1506,7 +1513,7 @@ bool CodeGen::genCreateAddrMode(
                 goto FOUND_AM;
             }
             break;
-#endif // !TARGET_ARMARCH
+#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64
 
         case GT_NOP:
 
@@ -1746,6 +1753,67 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
 }
 #endif // TARGET_ARMARCH
 
+#ifdef TARGET_LOONGARCH64
+//------------------------------------------------------------------------
+// genEmitGSCookieCheck: Generate code to check that the GS cookie
+// wasn't thrashed by a buffer overrun.
+//
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+    noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+    // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
+    // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
+    if (!pushReg && (compiler->info.compRetType == TYP_REF))
+        gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+
+    // We need two temporary registers, to load the GS cookie values and compare them. We can't use
+    // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
+    // callee-trash registers, which should not contain anything interesting at this point.
+    // We don't have any IR node representing this check, so LSRA can't communicate registers
+    // for us to use.
+
+    regNumber regGSConst = REG_GSCOOKIE_TMP_0;
+    regNumber regGSValue = REG_GSCOOKIE_TMP_1;
+
+    if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+    {
+        // load the GS cookie constant into a reg
+        //
+        genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+    }
+    else
+    {
+        //// Ngen case - GS cookie constant needs to be accessed through an indirection.
+        //instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0);
+        if (compiler->opts.compReloc)
+        {
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        }
+        else
+        {////TODO:LoongArch64 should amend for optimize!
+            //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000)>>12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff)>>2);
+        }
+        regSet.verifyRegUsed(regGSConst);
+    }
+    // Load this method's GS value from the stack frame
+    GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
+
+    // Compare with the GC cookie constant
+    BasicBlock* gsCheckBlk = genCreateTempLabel();
+    GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue);
+
+    // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
+    genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);//no branch-delay!
+    genDefineTempLabel(gsCheckBlk);
+}
+#endif // TARGET_LOONGARCH64
+
 /*****************************************************************************
  *
  *  Generate an exit sequence for a return from a method (note: when compiling
@@ -1856,6 +1924,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi
         // The code to throw the exception will be generated inline, and
         //  we will jump around it in the normal non-exception case.
 
+#ifndef TARGET_LOONGARCH64
         BasicBlock*  tgtBlk          = nullptr;
         emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
         if (reverseJumpKind != jumpKind)
@@ -1863,15 +1932,18 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi
             tgtBlk = genCreateTempLabel();
             inst_JMP(reverseJumpKind, tgtBlk);
         }
+#endif
 
         genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
 
+#ifndef TARGET_LOONGARCH64
         // Define the spot for the normal non-exception case to jump to.
         if (tgtBlk != nullptr)
         {
             assert(reverseJumpKind != jumpKind);
             genDefineTempLabel(tgtBlk);
         }
+#endif
     }
 }
 
@@ -1881,6 +1953,7 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi
  * have set the flags. Check if the operation caused an overflow.
  */
 
+#ifndef TARGET_LOONGARCH64
 // inline
 void CodeGen::genCheckOverflow(GenTree* tree)
 {
@@ -1927,6 +2000,7 @@ void CodeGen::genCheckOverflow(GenTree* tree)
 
     genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
 }
+#endif
 
 #if defined(FEATURE_EH_FUNCLETS)
 
@@ -2106,6 +2180,10 @@ void CodeGen::genGenerateMachineCode()
         {
             printf("generic ARM64 CPU");
         }
+        else if (compiler->info.genCPU == CPU_LOONGARCH64)
+        {
+            printf("generic LOONGARCH64 CPU");
+        }
         else
         {
             printf("unknown architecture");
@@ -2305,7 +2383,7 @@ void CodeGen::genEmitMachineCode()
 
     bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     trackedStackPtrsContig = false;
 #elif defined(TARGET_ARM)
     // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
@@ -3224,6 +3302,285 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #pragma warning(push)
 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
 #endif
+
+#if defined(TARGET_LOONGARCH64)
+void CodeGen::genFnPrologCalleeRegArgs()
+{
+    assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn));
+
+    regMaskTP regArgMaskLive = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genFnPrologCalleeRegArgs() LOONGARCH64:0x%llx.\n", regArgMaskLive);
+    }
+#endif
+
+    // We should be generating the prolog block when we are called
+    assert(compiler->compGeneratingProlog);
+
+    // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
+    noway_assert(regArgMaskLive != 0);
+
+    unsigned varNum;
+    unsigned regArgsVars[MAX_REG_ARG*2] = {0};
+    unsigned regArgNum = 0;
+    for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
+    {
+        LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+        // Is this variable a register arg?
+        if (!varDsc->lvIsParam)
+        {
+            continue;
+        }
+
+        if (!varDsc->lvIsRegArg)
+        {
+            continue;
+        }
+
+        if (varDsc->lvIsInReg())
+        {
+            assert(genIsValidIntReg(varDsc->GetArgReg()) || genIsValidFloatReg(varDsc->GetArgReg()));
+            assert(!(genIsValidIntReg(varDsc->GetOtherArgReg()) || genIsValidFloatReg(varDsc->GetOtherArgReg())));
+            if (varDsc->GetArgInitReg() != varDsc->GetArgReg())
+            {
+                if (varDsc->GetArgInitReg() > REG_ARG_LAST)
+                {
+                    inst_Mov(genIsValidFloatReg(varDsc->GetArgInitReg()) ? TYP_DOUBLE : TYP_LONG, varDsc->GetArgInitReg(), varDsc->GetArgReg(), false);
+                    regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
+                }
+                else
+                {
+                    regArgsVars[regArgNum] = varNum;
+                    regArgNum++;
+                }
+            }
+            else
+                regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
+#ifdef USING_SCOPE_INFO
+            psiMoveToReg(varNum);
+#endif // USING_SCOPE_INFO
+            if (!varDsc->lvLiveInOutOfHndlr)
+                continue;
+        }
+
+        // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
+        // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
+        // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
+        // use the the original TYP_STRUCT argument.
+        //
+        if (varDsc->lvPromoted || varDsc->lvIsStructField)
+        {
+            assert(!"-------------Should confirm on Loongarch!");
+        }
+
+        var_types storeType = TYP_UNDEF;
+        unsigned  slotSize  = TARGET_POINTER_SIZE;
+
+        if (varTypeIsStruct(varDsc))
+        {
+            if (emitter::isFloatReg(varDsc->GetArgReg()))
+            {
+                storeType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
+            }
+            else //if (emitter::isGeneralRegister(varDsc->GetArgReg()))
+            {
+                assert(emitter::isGeneralRegister(varDsc->GetArgReg()));
+                if (varDsc->lvIs4Field1)
+                    storeType = TYP_INT;
+                else
+                    storeType = varDsc->GetLayout()->GetGCPtrType(0);
+            }
+            slotSize  = (unsigned)emitActualTypeSize(storeType);
+
+#if FEATURE_MULTIREG_ARGS
+            // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
+            noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
+#endif
+        }
+        else // Not a struct type
+        {
+            storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
+            if (emitter::isFloatReg(varDsc->GetArgReg()) != varTypeIsFloating(storeType))
+            {
+                assert(varTypeIsFloating(storeType));
+                storeType = storeType == TYP_DOUBLE ? TYP_I_IMPL : TYP_INT;
+            }
+        }
+        emitAttr size = emitActualTypeSize(storeType);
+
+        regNumber srcRegNum = varDsc->GetArgReg();
+
+        // Stack argument - if the ref count is 0 don't care about it
+        if (!varDsc->lvOnFrame)
+        {
+            noway_assert(varDsc->lvRefCnt() == 0);
+            regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
+            if (varDsc->GetOtherArgReg() < REG_STK)
+                regArgMaskLive &= ~genRegMask(varDsc->GetOtherArgReg());
+        }
+        else
+        {
+            assert(srcRegNum != varDsc->GetOtherArgReg());
+
+            int tmp_offset = 0;
+            regNumber tmp_reg = REG_NA;
+
+            bool FPbased;
+            int baseOffset = 0;//(regArgTab[argNum].slot - 1) * slotSize;
+            int  base = compiler->lvaFrameAddress(varNum, &FPbased);
+
+            base += baseOffset;
+
+            if ((-2048 <= base) && (base < 2048))
+            {
+                GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+            }
+            else
+            {
+                if (tmp_reg == REG_NA)
+                {
+                    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+                    tmp_offset = base;
+                    tmp_reg = REG_R21;
+                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                    GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
+                    GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
+                    GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8);
+                }
+                else
+                {
+                    baseOffset = -(base - tmp_offset) - 8;
+                    GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+                }
+            }
+
+            regArgMaskLive &= ~genRegMask(srcRegNum);
+
+            // Check if we are writing past the end of the struct
+            if (varTypeIsStruct(varDsc))
+            {
+                if (emitter::isFloatReg(varDsc->GetOtherArgReg()))
+                {
+                    baseOffset = (int)EA_SIZE(emitActualTypeSize(storeType));
+                    storeType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE;
+                    size = EA_SIZE(emitActualTypeSize(storeType));
+                    baseOffset = baseOffset < (int)size ? (int)size : baseOffset;
+                    srcRegNum = varDsc->GetOtherArgReg();
+                }
+                else if (emitter::isGeneralRegister(varDsc->GetOtherArgReg()))
+                {
+                    baseOffset = (int)EA_SIZE(slotSize);
+                    if (varDsc->lvIs4Field2)
+                        storeType = TYP_INT;
+                    else
+                        storeType = varDsc->GetLayout()->GetGCPtrType(1);
+                    size = emitActualTypeSize(storeType);
+                    if (baseOffset < (int)EA_SIZE(size))
+                        baseOffset = (int)EA_SIZE(size);
+                    srcRegNum = varDsc->GetOtherArgReg();
+                }
+
+                if (srcRegNum == varDsc->GetOtherArgReg())
+                {
+                    base += baseOffset;
+
+                    if ((-2048 <= base) && (base < 2048))
+                    {
+                        GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+                    }
+                    else
+                    {
+                        if (tmp_reg == REG_NA)
+                        {
+                            regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+                            tmp_offset = base;
+                            tmp_reg = REG_R21;
+                            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                            GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
+                            GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
+                            GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8);
+                        }
+                        else
+                        {
+                            baseOffset = -(base - tmp_offset) - 8;
+                            GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+                        }
+                    }
+                    regArgMaskLive &= ~genRegMask(srcRegNum);//maybe do this later is better!
+                }
+                else if (varDsc->lvIsSplit)
+                {
+                    assert(varDsc->GetArgReg() == REG_ARG_LAST && varDsc->GetOtherArgReg() == REG_STK);
+                    baseOffset = 8;
+                    base += 8;
+
+                    GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size/*EA_PTRSIZE*/, REG_SCRATCH, REG_SPBASE, genTotalFrameSize());
+                    if ((-2048 <= base) && (base < 2048))
+                    {
+                        GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset);
+                    }
+                    else
+                    {
+                        if (tmp_reg == REG_NA)
+                        {
+                            regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+                            tmp_offset = base;
+                            tmp_reg = REG_R21;
+                            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                            GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
+                            GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
+                            GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, -8);
+                        }
+                        else
+                        {
+                            baseOffset = -(base - tmp_offset) - 8;
+                            GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, baseOffset);
+                        }
+                    }
+                }
+            }
+
+#ifdef USING_SCOPE_INFO
+            {
+                psiMoveToStack(varNum);
+            }
+#endif // USING_SCOPE_INFO
+        }
+    }
+
+    while (regArgNum > 0)
+    {
+        varNum = regArgsVars[regArgNum - 1];
+        LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+        if (varDsc->GetArgInitReg() > varDsc->GetArgReg())
+        {
+            var_types destMemType = varDsc->TypeGet();
+            GetEmitter()->emitIns_R_R(ins_Copy(destMemType), emitActualTypeSize(destMemType), varDsc->GetArgInitReg(), varDsc->GetArgReg());
+            regArgNum--;
+            regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
+        }
+        else
+        {
+            for (int i=0; i < regArgNum; i++)
+            {
+                LclVarDsc* varDsc2 = compiler->lvaTable + regArgsVars[i];
+                var_types destMemType = varDsc2->GetRegisterType();
+                inst_Mov(destMemType, varDsc2->GetArgInitReg(), varDsc2->GetArgReg(), /* canSkip */ false, emitActualTypeSize(destMemType));
+                regArgMaskLive &= ~genRegMask(varDsc2->GetArgReg());
+            }
+            break;
+        }
+    }
+
+    assert(!regArgMaskLive);
+
+}
+#else  //!defined(TARGET_LOONGARCH64)
 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
 {
 #ifdef DEBUG
@@ -4514,6 +4871,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
         noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
     }
 }
+#endif
+
 #ifdef _PREFAST_
 #pragma warning(pop)
 #endif
@@ -4540,6 +4899,11 @@ void CodeGen::genEnregisterIncomingStackArgs()
 
     unsigned varNum = 0;
 
+#ifdef TARGET_LOONGARCH64
+    int tmp_offset = 0;
+    regNumber tmp_reg = REG_NA;
+#endif
+
     for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
     {
         /* Is this variable a parameter? */
@@ -4586,8 +4950,39 @@ void CodeGen::genEnregisterIncomingStackArgs()
         assert(regNum != REG_STK);
 
         var_types regType = varDsc->GetActualRegisterType();
+#ifdef TARGET_LOONGARCH64
+        {
+            bool FPbased;
+            //int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
+            int  base = compiler->lvaFrameAddress(varNum, &FPbased);
 
+            if ((-2048 <= base) && (base < 2048))
+            {
+                GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
+            }
+            else
+            {
+                if (tmp_reg == REG_NA)
+                {
+                    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+                    tmp_offset = base;
+                    tmp_reg = REG_R21;
+                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                    GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
+                    GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
+                    GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8);
+                }
+                else
+                {
+                    int baseOffset = -(base - tmp_offset) - 8;
+                    GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, baseOffset);
+                }
+            }
+        }
+#else
         GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
+#endif // TARGET_LOONGARCH64
+
         regSet.verifyRegUsed(regNum);
 #ifdef USING_SCOPE_INFO
         psiMoveToReg(varNum);
@@ -5068,6 +5463,9 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
 #elif defined(TARGET_ARM64)
                 // We will just zero out the entire vector register. This sets it to a double/float zero value
                 GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
+#elif defined(TARGET_LOONGARCH64)
+                // We will just zero out the entire vector register. This sets it to a double/float zero value
+                GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0);
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif
@@ -5103,6 +5501,8 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
 #elif defined(TARGET_ARM64)
                 // We will just zero out the entire vector register. This sets it to a double/float zero value
                 GetEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
+#elif defined(TARGET_LOONGARCH64)
+                GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, reg, REG_R0);
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif
@@ -5498,23 +5898,230 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
     noway_assert(compiler->compCalleeRegsPushed == popCount);
 }
 
-#endif // TARGET*
-
-// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
-// Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
-regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
+#elif defined(TARGET_LOONGARCH64)
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
 {
-#ifdef TARGET_ARM64
-    return REG_ZR;
-#else  // !TARGET_ARM64
-    if (*pInitRegZeroed == false)
+    assert(compiler->compGeneratingEpilog);
+
+    regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+    if (isFramePointerUsed())
     {
-        instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
-        *pInitRegZeroed = true;
+        rsRestoreRegs |= RBM_FPBASE;
     }
-    return initReg;
-#endif // !TARGET_ARM64
-}
+
+    rsRestoreRegs |= RBM_RA; // We must save/restore the return address.
+
+    regMaskTP regsToRestoreMask = rsRestoreRegs;
+
+    int totalFrameSize = genTotalFrameSize();
+
+    int calleeSaveSPOffset = 0; // This will be the starting place for restoring
+                                // the callee-saved registers, in decreasing order.
+    int frameType         = 0;  // An indicator of what type of frame we are popping.
+    int calleeSaveSPDelta = 0;  // Amount to add to SP after callee-saved registers have been restored.
+
+    if (isFramePointerUsed())
+    {
+        if (totalFrameSize <= 2047)
+        {
+            if (compiler->compLocallocUsed)
+            {
+                int SPtoFPdelta = genSPtoFPdelta();
+                // Restore sp from fp
+                GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta);
+                compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
+            }
+
+            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+            {
+                JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
+                        totalFrameSize, dspBool(compiler->compLocallocUsed));
+
+                frameType = 1;
+
+                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+                calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+            }
+            else
+            {
+                frameType = 2;
+
+                calleeSaveSPOffset = compiler->compLclFrameSize;
+
+                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
+                        totalFrameSize, dspBool(compiler->compLocallocUsed));
+
+            }
+            //calleeSaveSPDelta = 0;
+        }
+        else
+        {
+            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+            {
+                JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
+                        dspBool(compiler->compLocallocUsed));
+
+                frameType = 3;
+
+                int outSzAligned;
+                if (compiler->lvaOutgoingArgSpaceSize >= 2040)
+                {
+                    int offset = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES;
+                    calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
+                    calleeSaveSPOffset = calleeSaveSPDelta - offset;
+
+                    int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
+                    calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN);
+                    offset2 = calleeSaveSPDelta - offset2;
+
+                    if (compiler->compLocallocUsed)
+                    {
+                        // Restore sp from fp
+                        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
+                        compiler->unwindSetFrameReg(REG_FPBASE, offset2);
+                    }
+                    else
+                    {
+                        outSzAligned = compiler->lvaOutgoingArgSpaceSize &  ~0xf;
+                        //if (outSzAligned > 0)
+                        {
+                            genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true);
+                        }
+                    }
+
+                    regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
+                    compiler->unwindSaveReg(REG_RA, offset2 + 8);
+
+                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
+                    compiler->unwindSaveReg(REG_FP, offset2);
+
+                    genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+
+                    calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES;
+                    calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
+                }
+                else
+                {
+                    int offset2 = compiler->lvaOutgoingArgSpaceSize;
+                    if (compiler->compLocallocUsed)
+                    {
+                        // Restore sp from fp
+                        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
+                        compiler->unwindSetFrameReg(REG_FPBASE, offset2);
+                    }
+
+                    regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
+                    compiler->unwindSaveReg(REG_RA, offset2 + 8);
+
+                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
+                    compiler->unwindSaveReg(REG_FP, offset2);
+
+                    calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES;
+                    calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
+                    calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
+
+                    genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+                }
+            }
+            else
+            {
+                frameType = 4;
+
+                JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
+                        dspBool(compiler->compLocallocUsed));
+
+                calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize;
+                calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
+                calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
+
+                if (compiler->compLocallocUsed)
+                {
+                    calleeSaveSPDelta = calleeSaveSPOffset + REGSIZE_BYTES;
+
+                    // Restore sp from fp
+                    GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -calleeSaveSPDelta);
+                    compiler->unwindSetFrameReg(REG_FPBASE, calleeSaveSPDelta);
+                }
+                else
+                {
+                    calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta;
+                    genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+                }
+
+                calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize;
+                calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
+            }
+        }
+    }
+    else
+    {
+        // No frame pointer (no chaining).
+        NYI("Frame without frame pointer");
+        calleeSaveSPOffset = 0;
+    }
+
+    JITDUMP("    calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta);
+    genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
+
+    if (frameType == 1)
+    {
+        calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize;
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8);
+        compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset);
+        compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset);
+
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+        compiler->unwindAllocStack(totalFrameSize);
+    }
+    else if (frameType == 2)
+    {
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+        compiler->unwindAllocStack(totalFrameSize);
+    }
+    else if (frameType == 3)
+    {
+        //genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else if (frameType == 4)
+    {
+        //genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else
+    {
+        unreached();
+    }
+}
+
+#endif // TARGET*
+
+// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
+// Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
+regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
+{
+#ifdef TARGET_ARM64
+    return REG_ZR;
+#elif defined(TARGET_LOONGARCH64)
+    return REG_R0;
+#else  // !TARGET_LOONGARCH64
+    if (*pInitRegZeroed == false)
+    {
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+        *pInitRegZeroed = true;
+    }
+    return initReg;
+#endif // !TARGET_LOONGARCH64
+}
 
 //-----------------------------------------------------------------------------
 // genZeroInitFrame: Zero any untracked pointer locals and/or initialize memory for locspace
@@ -6072,6 +6679,118 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
                 assert(i == alignmentHiBlkSize);
             }
         }
+
+#elif defined(TARGET_LOONGARCH64)
+        regNumber rAddr;
+        regNumber rCnt = REG_NA; // Invalid
+        regMaskTP regMask;
+
+        regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
+        //see: src/jit/registerloongarch64.h
+        availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
+                                                            // currently live
+        availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
+                                           // a large constant.
+
+        rAddr           = initReg;
+        *pInitRegZeroed = false;
+
+        // rAddr is not a live incoming argument reg
+        assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0);
+        assert(untrLclLo%4 == 0);
+
+        if ((-2048 <= untrLclLo) && (untrLclLo < 2048))
+        {
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
+        }
+        else
+        {
+            // Load immediate into the InitReg register
+            instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
+            GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
+            *pInitRegZeroed = false;
+        }
+
+        bool     useLoop   = false;
+        unsigned uCntBytes = untrLclHi - untrLclLo;
+        assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
+        unsigned int padding = untrLclLo & 0x7;
+
+        if (padding)
+        {
+            assert(padding == 4);
+            GetEmitter()->emitIns_R_R_I(INS_st_w, EA_4BYTE, REG_R0, rAddr, 0);
+            uCntBytes -= 4;
+        }
+
+        unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
+
+        // When uCntSlots is 9 or less, we will emit a sequence of sd instructions inline.
+        // When it is 10 or greater, we will emit a loop containing a sd instruction.
+        // In both of these cases the sd instruction will write two zeros to memory
+        // and we will use a single str instruction at the end whenever we have an odd count.
+        if (uCntSlots >= 10)
+            useLoop = true;
+
+        if (useLoop)
+        {
+            // We pick the next lowest register number for rCnt
+            noway_assert(availMask != RBM_NONE);
+            regMask = genFindLowestBit(availMask);
+            rCnt    = genRegNumFromMask(regMask);
+            availMask &= ~regMask;
+
+            noway_assert(uCntSlots >= 2);
+            assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming
+                                                                                    // argument reg
+            instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
+
+            /* TODO for LA: maybe optimize further */
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding);
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding);
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rCnt, rCnt, -1);
+
+            // bne rCnt, zero, -4 * 4
+            ssize_t imm = -16;
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES);
+            GetEmitter()->emitIns_R_R_I(INS_bne, EA_PTRSIZE, rCnt, REG_R0, imm);
+
+            uCntBytes %= REGSIZE_BYTES * 2;
+        }
+        else
+        {
+            while (uCntBytes >= REGSIZE_BYTES * 2)
+            {
+                /* TODO for LA: can be optimize further */
+                GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding);
+                GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding);
+                GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, 2 * REGSIZE_BYTES + padding);
+                uCntBytes -= REGSIZE_BYTES * 2;
+                padding = 0;
+            }
+        }
+
+        if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
+        {
+            if ((uCntBytes - REGSIZE_BYTES) == 0)
+            {
+                GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, padding);
+            }
+            else
+            {
+                GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, padding);
+                GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, rAddr, REGSIZE_BYTES);
+            }
+            uCntBytes -= REGSIZE_BYTES;
+        }
+        if (uCntBytes > 0)
+        {
+            assert(uCntBytes == sizeof(int));
+            GetEmitter()->emitIns_R_R_I(INS_st_w, EA_4BYTE, REG_R0, rAddr, padding);
+            uCntBytes -= sizeof(int);
+        }
+        noway_assert(uCntBytes == 0);
+
 #else  // TARGET*
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -6385,11 +7104,14 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed
     // ARM's emitIns_R_R_I automatically uses the reserved register if necessary.
     GetEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
                                 compiler->lvaCachedGenericContextArgOffset());
-#else  // !ARM64 !ARM
+#elif defined(TARGET_LOONGARCH64)
+    genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
+                                compiler->lvaCachedGenericContextArgOffset(), REG_R21);
+#else  // !ARM64 !ARM !LOONGARCH64
     // mov [ebp-lvaCachedGenericContextArgOffset()], reg
     GetEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
                                compiler->lvaCachedGenericContextArgOffset());
-#endif // !ARM64 !ARM
+#endif // !ARM64 !ARM !LOONGARCH64
 }
 
 /*****************************************************************************
@@ -6772,6 +7494,23 @@ void CodeGen::genFinalizeFrame()
     maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
 #endif // defined(TARGET_XARCH)
 
+#ifdef TARGET_LOONGARCH64
+    if (isFramePointerUsed())
+    {
+        // For a FP based frame we have to push/pop the FP register
+        //
+        maskCalleeRegsPushed |= RBM_FPBASE;
+
+        // This assert check that we are not using REG_FP
+        // as both the frame pointer and as a codegen register
+        //
+        assert(!regSet.rsRegsModified(RBM_FPBASE));
+    }
+
+    // we always push RA.  See genPushCalleeSavedRegisters
+    maskCalleeRegsPushed |= RBM_RA;
+#endif // TARGET_LOONGARCH64
+
     compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
 
 #ifdef DEBUG
@@ -6861,6 +7600,22 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
         compiler->unwindSetFrameReg(REG_FPBASE, delta);
     }
 
+#elif defined(TARGET_LOONGARCH64)
+    if (delta == 0)
+    {
+        GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
+    }
+    else
+    {
+        assert((-2048 <= delta) && (delta < 2048));
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
+    }
+
+    if (reportUnwindData)
+    {
+        compiler->unwindSetFrameReg(REG_FPBASE, delta);
+    }
+
 #else
     NYI("establish frame pointer");
 #endif
@@ -6953,10 +7708,10 @@ void CodeGen::genFnProlog()
         instGen(INS_nop);
         instGen(INS_BREAKPOINT);
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         // Avoid asserts in the unwind info because these instructions aren't accounted for.
         compiler->unwindPadding();
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
     }
 #endif // DEBUG
 
@@ -7134,11 +7889,11 @@ void CodeGen::genFnProlog()
         // previous frame pointer. Thus, stkOffs can't be zero.
         CLANG_FORMAT_COMMENT_ANCHOR;
 
-#if !defined(TARGET_AMD64)
+#if !defined(TARGET_AMD64) && !defined(TARGET_LOONGARCH64)
         // However, on amd64 there is no requirement to chain frame pointers.
 
         noway_assert(!isFramePointerUsed() || loOffs != 0);
-#endif // !defined(TARGET_AMD64)
+#endif // !defined(TARGET_AMD64) && !defined(TARGET_LOONGARCH64)
 
         // printf("    Untracked tmp at [EBP-%04X]\n", -stkOffs);
 
@@ -7298,9 +8053,9 @@ void CodeGen::genFnProlog()
     }
 #endif // TARGET_XARCH
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     genPushCalleeSavedRegisters(initReg, &initRegZeroed);
-#else  // !TARGET_ARM64
+#else  // !TARGET_ARM64 || !TARGET_LOONGARCH64
     genPushCalleeSavedRegisters();
 #endif // !TARGET_ARM64
 
@@ -7333,7 +8088,7 @@ void CodeGen::genFnProlog()
     //-------------------------------------------------------------------------
     CLANG_FORMAT_COMMENT_ANCHOR;
 
-#ifndef TARGET_ARM64
+#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
     regMaskTP maskStackAlloc = RBM_NONE;
 
 #ifdef TARGET_ARM
@@ -7345,7 +8100,7 @@ void CodeGen::genFnProlog()
     {
         genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
     }
-#endif // !TARGET_ARM64
+#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64
 
 //-------------------------------------------------------------------------
 
@@ -7485,6 +8240,13 @@ void CodeGen::genFnProlog()
     //
     if (!compiler->opts.IsOSR())
     {
+#if defined(TARGET_LOONGARCH64)
+        if (intRegState.rsCalleeRegArgMaskLiveIn || floatRegState.rsCalleeRegArgMaskLiveIn)
+        {
+            initRegZeroed = false;
+            genFnPrologCalleeRegArgs();
+        }
+#else
         auto assignIncomingRegisterArgs = [this, initReg, &initRegZeroed](RegState* regState) {
             if (regState->rsCalleeRegArgMaskLiveIn)
             {
@@ -7519,6 +8281,7 @@ void CodeGen::genFnProlog()
         assignIncomingRegisterArgs(&floatRegState);
 #else
         assignIncomingRegisterArgs(&intRegState);
+#endif
 #endif
     }
 
@@ -8453,6 +9216,192 @@ void CodeGen::genFnEpilog(BasicBlock* block)
     }
 }
 
+#elif defined(TARGET_LOONGARCH64)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFnEpilog()\n");
+#endif // DEBUG
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, GetEmitter()->emitInitGCrefVars);
+    gcInfo.gcRegGCrefSetCur = GetEmitter()->emitInitGCrefRegs;
+    gcInfo.gcRegByrefSetCur = GetEmitter()->emitInitByrefRegs;
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+        printf("\n__epilog:\n");
+
+    if (verbose)
+    {
+        printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+        dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+        printf(", gcRegGCrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+        GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+        printf(", gcRegByrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegByrefSetCur);
+        GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+    GenTree* lastNode = block->lastNode();
+
+    // Method handle and address info used in case of jump epilog
+    CORINFO_METHOD_HANDLE methHnd = nullptr;
+    CORINFO_CONST_LOOKUP  addrInfo;
+    addrInfo.addr       = nullptr;
+    addrInfo.accessType = IAT_VALUE;
+
+    if (jmpEpilog && lastNode->gtOper == GT_JMP)
+    {
+        methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1;
+        compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+    }
+
+    compiler->unwindBegEpilog();
+
+    if (jmpEpilog)
+    {
+        SetHasTailCalls(true);
+
+        noway_assert(block->bbJumpKind == BBJ_RETURN);
+        noway_assert(block->GetFirstLIRNode() != nullptr);
+
+        /* figure out what jump we have */
+        GenTree* jmpNode = lastNode;
+#if !FEATURE_FASTTAILCALL
+        noway_assert(jmpNode->gtOper == GT_JMP);
+#else  // FEATURE_FASTTAILCALL
+        // armarch
+        // If jmpNode is GT_JMP then gtNext must be null.
+        // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+        noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
+
+        // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
+        noway_assert((jmpNode->gtOper == GT_JMP) ||
+                     ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
+
+        // The next block is associated with this "if" stmt
+        if (jmpNode->gtOper == GT_JMP)
+#endif // FEATURE_FASTTAILCALL
+        {
+            // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+            // the same descriptor with some minor adjustments.
+            assert(methHnd != nullptr);
+            assert(addrInfo.addr != nullptr);
+
+            emitter::EmitCallType callType;
+            void*                 addr;
+            regNumber             indCallReg;
+            switch (addrInfo.accessType)
+            {
+                case IAT_VALUE:
+                    //if (validImmForBAL((ssize_t)addrInfo.addr))
+                    //{
+                    //    // Simple direct call
+
+                    //    //TODO for LA.
+                    //    callType   = emitter::EC_FUNC_TOKEN;
+                    //    addr       = addrInfo.addr;
+                    //    indCallReg = REG_NA;
+                    //    break;
+                    //}
+
+                    //// otherwise the target address doesn't fit in an immediate
+                    //// so we have to burn a register...
+                    //__fallthrough;
+
+                case IAT_PVALUE:
+                    // Load the address into a register, load indirect and call  through a register
+                    // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use
+                    callType   = emitter::EC_INDIR_R;
+                    indCallReg = REG_INDIRECT_CALL_TARGET_REG;
+                    addr       = NULL;
+                    instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
+                    if (addrInfo.accessType == IAT_PVALUE)
+                    {
+                        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                        regSet.verifyRegUsed(indCallReg);
+                    }
+                    break;
+
+                case IAT_RELPVALUE:
+                {
+                    // Load the address into a register, load relative indirect and call through a register
+                    // We have to use R12 since we assume the argument registers are in use
+                    // LR is used as helper register right before it is restored from stack, thus,
+                    // all relative address calculations are performed before LR is restored.
+                    callType   = emitter::EC_INDIR_R;
+                    indCallReg = REG_T2;
+                    addr       = NULL;
+
+                    regSet.verifyRegUsed(indCallReg);
+                    break;
+                }
+
+                case IAT_PPVALUE:
+                default:
+                    NO_WAY("Unsupported JMP indirection");
+            }
+
+            /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
+             * the same descriptor with some minor adjustments.
+             */
+
+            genPopCalleeSavedRegisters(true);
+
+            // clang-format off
+            GetEmitter()->emitIns_Call(callType,
+                                       methHnd,
+                                       INDEBUG_LDISASM_COMMA(nullptr)
+                                       addr,
+                                       0,          // argSize
+                                       EA_UNKNOWN // retSize
+                                       MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
+                                       gcInfo.gcVarPtrSetCur,
+                                       gcInfo.gcRegGCrefSetCur,
+                                       gcInfo.gcRegByrefSetCur,
+                                       DebugInfo(),
+                                       indCallReg,    // ireg
+                                       REG_NA,        // xreg
+                                       0,             // xmul
+                                       0,             // disp
+                                       true);         // isJump
+            // clang-format on
+            CLANG_FORMAT_COMMENT_ANCHOR;
+        }
+#if FEATURE_FASTTAILCALL
+        else
+        {
+            genPopCalleeSavedRegisters(true);
+            // Fast tail call.
+            // Call target = REG_FASTTAILCALL_TARGET
+            // https://github.com/dotnet/coreclr/issues/4827
+            // Do we need a special encoding for stack walker like rex.w prefix for x64?
+
+            //TODO for LA: whether the relative address is enough for optimize?
+            GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0);
+        }
+#endif // FEATURE_FASTTAILCALL
+    }
+    else
+    {
+        genPopCalleeSavedRegisters(false);
+
+        GetEmitter()->emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_RA, 0);
+        compiler->unwindReturn(REG_RA);
+    }
+
+    compiler->unwindEndEpilog();
+}
+
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -9145,6 +10094,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
     }
 }
 
+#elif defined(TARGET_LOONGARCH64)
+
+// Look in codegenloongarch64.cpp
+
 #else // TARGET*
 
 /*****************************************************************************
@@ -9329,6 +10282,18 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
 
     GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
 
+#elif defined(TARGET_LOONGARCH64)
+
+    int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
+
+    // We will just use the initReg since it is an available register
+    // and we are probably done using it anyway...
+    regNumber regTmp = initReg;
+    *pInitRegZeroed  = false;
+
+    genInstrWithConstant(INS_addi_d, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, REG_R21, false);
+    GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
+
 #else // TARGET*
 
     NYI("Set function PSP sym");
@@ -9582,7 +10547,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass, CorInfoCallCo
     structPassingKind howToReturnStruct;
     var_types         returnType = getReturnTypeForStruct(hClass, callConv, &howToReturnStruct);
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64)  || defined(TARGET_LOONGARCH64)
     return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType));
 #else
     return (varTypeIsStruct(returnType));
@@ -9744,7 +10709,7 @@ instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shi
 //
 unsigned CodeGen::getFirstArgWithStackSlot()
 {
-#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     unsigned baseVarNum = 0;
     // Iterate over all the lvParam variables in the Lcl var table until we find the first one
     // that's passed on the stack.
@@ -10907,9 +11872,9 @@ void CodeGen::genReturn(GenTree* treeNode)
             // exit point where it is actually dead.
             genConsumeReg(op1);
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             genSimpleReturn(treeNode);
-#else // !TARGET_ARM64
+#else // !TARGET_ARM64 || !TARGET_LOONGARCH64
 #if defined(TARGET_X86)
             if (varTypeUsesFloatReg(treeNode))
             {
@@ -10937,7 +11902,7 @@ void CodeGen::genReturn(GenTree* treeNode)
                 regNumber retReg = varTypeUsesFloatReg(treeNode) ? REG_FLOATRET : REG_INTRET;
                 inst_Mov_Extend(targetType, /* srcInReg */ true, retReg, op1->GetRegNum(), /* canSkip */ true);
             }
-#endif // !TARGET_ARM64
+#endif // !TARGET_ARM64 || !TARGET_LOONGARCH64
         }
     }
 
diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h
index f276a492da33d..84d8560545894 100644
--- a/src/coreclr/jit/codegeninterface.h
+++ b/src/coreclr/jit/codegeninterface.h
@@ -114,6 +114,8 @@ class CodeGenInterface
     static const insFlags instInfo[INS_count];
 #elif defined(TARGET_ARM) || defined(TARGET_ARM64)
     static const BYTE instInfo[INS_count];
+#elif defined(TARGET_LOONGARCH64)
+    static const BYTE instInfo[INS_count];
 #else
 #error Unsupported target architecture
 #endif
@@ -196,6 +198,11 @@ class CodeGenInterface
     virtual bool IsSaveFpLrWithAllCalleeSavedRegisters() const      = 0;
 #endif // TARGET_ARM64
 
+#ifdef TARGET_LOONGARCH64
+    virtual void SetSaveFpRaWithAllCalleeSavedRegisters(bool value) = 0;
+    virtual bool IsSaveFpRaWithAllCalleeSavedRegisters() const      = 0;
+#endif // TARGET_LOONGARCH64
+
     regNumber genGetThisArgReg(GenTreeCall* call) const;
 
 #ifdef TARGET_XARCH
@@ -305,7 +312,11 @@ class CodeGenInterface
     bool validImmForAdd(target_ssize_t imm, insFlags flags);
     bool validImmForAlu(target_ssize_t imm);
     bool validImmForMov(target_ssize_t imm);
+#ifdef TARGET_LOONGARCH64
+    bool validImmForBAL(ssize_t addr);
+#else
     bool validImmForBL(ssize_t addr);
+#endif
 
     instruction ins_Load(var_types srcType, bool aligned = false);
     instruction ins_Store(var_types dstType, bool aligned = false);
@@ -360,7 +371,7 @@ class CodeGenInterface
         m_cgInterruptible = value;
     }
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
     bool GetHasTailCalls()
     {
@@ -374,7 +385,7 @@ class CodeGenInterface
 
 private:
     bool m_cgInterruptible;
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     bool m_cgHasTailCalls;
 #endif // TARGET_ARMARCH
 
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index 51e9afc074398..e10e498466f52 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -1211,7 +1211,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
             assert(spillType != TYP_UNDEF);
 
 // TODO-Cleanup: The following code could probably be further merged and cleaned up.
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // Load local variable from its home location.
             // In most cases the tree type will indicate the correct type to use for the load.
             // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
@@ -2515,7 +2515,12 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
             m_checkKind = CHECK_NONE;
         }
 
+#ifdef TARGET_LOONGARCH64
+        m_extendKind    = castUnsigned ? ZERO_EXTEND_INT : SIGN_EXTEND_INT;
+        cast->gtFlags |=  castUnsigned ? GTF_UNSIGNED : GTF_EMPTY;
+#else
         m_extendKind    = COPY;
+#endif
         m_extendSrcSize = 4;
     }
 #endif
@@ -2592,6 +2597,7 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode)
 }
 #endif // !defined(TARGET_64BIT)
 
+#ifndef TARGET_LOONGARCH64
 //------------------------------------------------------------------------
 // genCodeForJumpTrue: Generate code for a GT_JTRUE node.
 //
@@ -2637,6 +2643,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
 
     inst_JCC(condition, compiler->compCurBB->bbJumpDest);
 }
+#endif // !TARGET_LOONGARCH64
 
 //------------------------------------------------------------------------
 // genCodeForJcc: Generate code for a GT_JCC node.
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
new file mode 100644
index 0000000000000..49cc67b50dc36
--- /dev/null
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -0,0 +1,9157 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Copyright (c) Loongson Technology. All rights reserved.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                        LOONGARCH64 Code Generator                         XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef TARGET_LOONGARCH64
+#include "emit.h"
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+static short splitLow(int value) {
+    return (value & 0xffff);
+}
+
+// Returns true if 'value' is a legal signed immediate 16 bit encoding.
+static bool isValidSimm16(ssize_t value)
+{
+    return -( ((int)1) << 15 ) <= value && value < ( ((int)1) << 15 );
+};
+
+// Returns true if 'value' is a legal unsigned immediate 16 bit encoding.
+static bool isValidUimm16(ssize_t value)
+{
+    return (0 == (value >> 16));
+};
+
+// Returns true if 'value' is a legal signed immediate 12 bit encoding.
+static bool isValidSimm12(ssize_t value)
+{
+    return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 );
+};
+
+// Returns true if 'value' is a legal unsigned immediate 11 bit encoding.
+static bool isValidUimm11(ssize_t value)
+{
+    return (0 == (value >> 11));
+};
+
+// Returns true if 'value' is a legal unsigned immediate 12 bit encoding.
+static bool isValidUimm12(ssize_t value)
+{
+    return (0 == (value >> 12));
+};
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Prolog / Epilog                                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+//------------------------------------------------------------------------
+// genInstrWithConstant:   we will typically generate one instruction
+//
+//    ins  reg1, reg2, imm
+//
+// However the imm might not fit as a directly encodable immediate,
+// when it doesn't fit we generate extra instruction(s) that sets up
+// the 'regTmp' with the proper immediate value.
+//
+//     li64  regTmp, imm
+//     ins  reg1, reg2, regTmp
+//
+// Arguments:
+//    ins                 - instruction
+//    attr                - operation size and GC attribute
+//    reg1, reg2          - first and second register operands
+//    imm                 - immediate value (third operand when it fits)
+//    tmpReg              - temp register to use when the 'imm' doesn't fit. Can be REG_NA
+//                          if caller knows for certain the constant will fit.
+//    inUnwindRegion      - true if we are in a prolog/epilog region with unwind codes.
+//                          Default: false.
+//
+// Return Value:
+//    returns true if the immediate was small enough to be encoded inside instruction. If not,
+//    returns false meaning the immediate was too large and tmpReg was used and modified.
+//
+bool CodeGen::genInstrWithConstant(instruction ins,
+                                   emitAttr    attr,
+                                   regNumber   reg1,
+                                   regNumber   reg2,
+                                   ssize_t     imm,
+                                   regNumber   tmpReg,
+                                   bool        inUnwindRegion /* = false */)
+{
+    emitAttr size         = EA_SIZE(attr);
+
+    // reg1 is usually a dest register
+    // reg2 is always source register
+    assert(tmpReg != reg2); // regTmp can not match any source register
+
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_addi_d:
+
+        case INS_st_b:
+        case INS_st_h:
+        case INS_st_w:
+        case INS_fst_s:
+        case INS_st_d:
+        case INS_fst_d:
+
+        case INS_ld_b:
+        case INS_ld_h:
+        case INS_ld_w:
+        case INS_fld_s:
+        case INS_ld_d:
+        case INS_fld_d:
+            break;
+
+        default:
+            assert(!"Unexpected instruction in genInstrWithConstant");
+            break;
+    }
+#endif
+    bool immFitsInIns = (-2048 <= imm) && (imm <= 2047);
+
+    if (immFitsInIns)
+    {
+        // generate a single instruction that encodes the immediate directly
+        GetEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm);
+    }
+    else
+    {
+        // caller can specify REG_NA  for tmpReg, when it "knows" that the immediate will always fit
+        assert(tmpReg != REG_NA);
+
+        // generate two or more instructions
+
+        // first we load the immediate into tmpReg
+        assert(!EA_IS_RELOC(size));
+        GetEmitter()->emitIns_I_la(size, tmpReg, imm);
+        regSet.verifyRegUsed(tmpReg);
+
+        // when we are in an unwind code region
+        // we record the extra instructions using unwindPadding()
+        if (inUnwindRegion)
+        {
+            compiler->unwindPadding();
+        }
+
+        if (ins == INS_addi_d)
+        {
+            GetEmitter()->emitIns_R_R_R(INS_add_d, attr, reg1, reg2, tmpReg);
+        }
+        else
+        {
+            GetEmitter()->emitIns_R_R_R(INS_add_d, attr, tmpReg, reg2, tmpReg);
+            GetEmitter()->emitIns_R_R_I(ins, attr, reg1, tmpReg, 0);
+        }
+    }
+    return immFitsInIns;
+}
+
+//------------------------------------------------------------------------
+// genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog
+// or the epilog. The unwind codes for the generated instructions are produced. An available temporary
+// register is required to be specified, in case the constant is too large to encode in an "daddu"
+// instruction (or "dsubu" instruction if we choose to use one), such that we need to load the constant
+// into a register first, before using it.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP (can be negative)
+//    tmpReg                  - an available temporary register
+//    pTmpRegIsZero           - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                              Otherwise, we don't touch it.
+//    reportUnwindData        - If true, report the change in unwind data. Otherwise, do not report it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData)
+{
+    // Even though INS_addi_d is specified here, the encoder will choose either
+    // an INS_add_d or an INS_addi_d and encode the immediate as a positive value
+    //
+    bool wasTempRegisterUsedForImm =
+        !genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true);
+    if (wasTempRegisterUsedForImm)
+    {
+        if (pTmpRegIsZero != nullptr)
+        {
+            *pTmpRegIsZero = false;
+        }
+    }
+
+    if (reportUnwindData)
+    {
+        // spDelta is negative in the prolog, positive in the epilog,
+        // but we always tell the unwind codes the positive value.
+        ssize_t  spDeltaAbs    = abs(spDelta);
+        unsigned unwindSpDelta = (unsigned)spDeltaAbs;
+        assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned
+
+        compiler->unwindAllocStack(unwindSpDelta);
+    }
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
+// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
+// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+//    reg1                     - First register of pair to save.
+//    reg2                     - Second register of pair to save.
+//    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
+//    useSaveNextPair          - True if the last prolog instruction was to save the previous register pair. This
+//                               allows us to emit the "save_next" unwind code.
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genPrologSaveRegPair(regNumber reg1,
+                                   regNumber reg2,
+                                   int       spOffset,
+                                   int       spDelta,
+                                   bool      useSaveNextPair,
+                                   regNumber tmpReg,
+                                   bool*     pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta <= 0);
+    assert((spDelta % 16) == 0);                                  // SP changes must be 16-byte aligned
+    assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
+                                                                  // FP/SIMD
+
+    instruction ins = INS_st_d;
+    if (genIsValidFloatReg(reg1))
+        ins = INS_fst_d;
+
+    if (spDelta != 0)
+    {
+        // generate addi.d SP,SP,-imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
+
+        assert((spDelta+spOffset+16)<=0);
+
+        assert(spOffset <= 2031);//2047-16
+    }
+
+// #ifdef OPTIMIZE_LOONGSON_EXT
+//     if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_st_d == ins))
+//     {
+//         GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4);
+//         compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+//     }
+//     else
+// #endif
+    {
+    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+    compiler->unwindSaveReg(reg1, spOffset);
+
+    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8);
+    compiler->unwindSaveReg(reg2, spOffset+8);
+    }
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
+// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
+// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
+// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
+// (and this function changes to support using pre-indexed SD addressing). The caller must ensure that we can use the
+// SD instruction, and that spOffset will be in the legal range for that instruction.
+//
+// Arguments:
+//    reg1                     - Register to save.
+//    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta <= 0);
+    assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+    instruction ins = INS_st_d;
+    if (genIsValidFloatReg(reg1))
+        ins = INS_fst_d;
+
+    if (spDelta != 0)
+    {
+        // generate daddiu SP,SP,-imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
+    }
+
+    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+    compiler->unwindSaveReg(reg1, spOffset);
+
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog.
+// The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing.
+// The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+//    reg1                     - First register of pair to restore.
+//    reg2                     - Second register of pair to restore.
+//    spOffset                 - The offset from SP to load reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
+//    useSaveNextPair          - True if the last prolog instruction was to save the previous register pair. This
+//                               allows us to emit the "save_next" unwind code.
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genEpilogRestoreRegPair(regNumber reg1,
+                                      regNumber reg2,
+                                      int       spOffset,
+                                      int       spDelta,
+                                      bool      useSaveNextPair,
+                                      regNumber tmpReg,
+                                      bool*     pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta >= 0);
+    assert((spDelta % 16) == 0);                                  // SP changes must be 16-byte aligned
+    assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
+                                                                  // FP/SIMD
+
+    instruction ins = INS_ld_d;
+    if (genIsValidFloatReg(reg1))
+        ins = INS_fld_d;
+
+    if (spDelta != 0)
+    {
+        assert(!useSaveNextPair);
+// #ifdef OPTIMIZE_LOONGSON_EXT
+//         if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_ld_d == ins))
+//         {
+//             GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4);
+//             compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+//         }
+//         else
+// #endif
+        {
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8);
+        compiler->unwindSaveReg(reg2, spOffset+8);
+
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        compiler->unwindSaveReg(reg1, spOffset);
+        }
+
+        // generate daddiu SP,SP,imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
+    }
+    else
+    {
+// #ifdef OPTIMIZE_LOONGSON_EXT
+//         if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_ld_d == ins))
+//         {
+//             GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4);
+//             compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+//         }
+//         else
+// #endif
+        {
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8);
+        compiler->unwindSaveReg(reg2, spOffset+8);
+
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        compiler->unwindSaveReg(reg1, spOffset);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog.
+//
+// Arguments:
+//    reg1                     - Register to restore.
+//    spOffset                 - The offset from SP to restore reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta >= 0);
+    assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+    instruction ins = INS_ld_d;
+    if (genIsValidFloatReg(reg1))
+        ins = INS_fld_d;
+
+    if (spDelta != 0)
+    {
+        // ld reg1, offset(SP)
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        compiler->unwindSaveReg(reg1, spOffset);
+
+        // generate add SP,SP,imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
+    }
+    else
+    {
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        compiler->unwindSaveReg(reg1, spOffset);
+    }
+}
+
+//------------------------------------------------------------------------
+// genBuildRegPairsStack: Build a stack of register pairs for prolog/epilog save/restore for the given mask.
+// The first register pair will contain the lowest register. Register pairs will combine neighbor
+// registers in pairs. If it can't be done (for example if we have a hole or this is the last reg in a mask with
+// odd number of regs) then the second element of that RegPair will be REG_NA.
+//
+// Arguments:
+//   regsMask - a mask of registers for prolog/epilog generation;
+//   regStack - a regStack instance to build the stack in, used to save temp copyings.
+//
+// Return value:
+//   no return value; the regStack argument is modified.
+//
+// static
+void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* regStack)
+{
+    assert(regStack != nullptr);
+    assert(regStack->Height() == 0);
+
+    unsigned regsCount = genCountBits(regsMask);
+
+    while (regsMask != RBM_NONE)
+    {
+        regMaskTP reg1Mask = genFindLowestBit(regsMask);
+        regNumber reg1     = genRegNumFromMask(reg1Mask);
+        regsMask &= ~reg1Mask;
+        regsCount -= 1;
+
+        bool isPairSave = false;
+        if (regsCount > 0)
+        {
+            regMaskTP reg2Mask = genFindLowestBit(regsMask);
+            regNumber reg2     = genRegNumFromMask(reg2Mask);
+            if (reg2 == REG_NEXT(reg1))
+            {
+                // The JIT doesn't allow saving pair (S7,FP), even though the
+                // save_regp register pair unwind code specification allows it.
+                // The JIT always saves (FP,RA) as a pair, and uses the save_fpra
+                // unwind code. This only comes up in stress mode scenarios
+                // where callee-saved registers are not allocated completely
+                // from lowest-to-highest, without gaps.
+                if (reg1 != REG_FP)
+                {
+                    // Both registers must have the same type to be saved as pair.
+                    if (genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2))
+                    {
+                        isPairSave = true;
+
+                        regsMask &= ~reg2Mask;
+                        regsCount -= 1;
+
+                        regStack->Push(RegPair(reg1, reg2));
+                    }
+                }
+            }
+        }
+        if (!isPairSave)
+        {
+            regStack->Push(RegPair(reg1));
+        }
+    }
+    assert(regsCount == 0 && regsMask == RBM_NONE);
+
+    genSetUseSaveNextPairs(regStack);
+}
+
+//------------------------------------------------------------------------
+// genSetUseSaveNextPairs: Set useSaveNextPair for each RegPair on the stack which unwind info can be encoded as
+// save_next code.
+//
+// Arguments:
+//   regStack - a regStack instance to set useSaveNextPair.
+//
+// Notes:
+// We can use save_next for RegPair(N, N+1) only when we have sequence like (N-2, N-1), (N, N+1).
+// In this case in the prolog save_next for (N, N+1) refers to save_pair(N-2, N-1);
+// in the epilog the unwinder will search for the first save_pair (N-2, N-1)
+// and then go back to the first save_next (N, N+1) to restore it first.
+//
+// static
+void CodeGen::genSetUseSaveNextPairs(ArrayStack<RegPair>* regStack)
+{
+    for (int i = 1; i < regStack->Height(); ++i)
+    {
+        RegPair& curr = regStack->BottomRef(i);
+        RegPair  prev = regStack->Bottom(i - 1);
+
+        if (prev.reg2 == REG_NA || curr.reg2 == REG_NA)
+        {
+            continue;
+        }
+
+        if (REG_NEXT(prev.reg2) != curr.reg1)
+        {
+            continue;
+        }
+
+        if (genIsValidFloatReg(prev.reg2) != genIsValidFloatReg(curr.reg1))
+        {
+            // It is possible to support changing of the last int pair with the first float pair,
+            // but it is very rare case and it would require superfluous changes in the unwinder.
+            continue;
+        }
+        curr.useSaveNextPair = true;
+    }
+}
+
+//------------------------------------------------------------------------
+// genGetSlotSizeForRegsInMask: Get the stack slot size appropriate for the register type from the mask.
+//
+// Arguments:
+//   regsMask - a mask of registers for prolog/epilog generation.
+//
+// Return value:
+//   stack slot size in bytes.
+//
+// Note: Because int and float register type sizes match we can call this function with a mask that includes both.
+//
+// static
+int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask)
+{
+    assert((regsMask & (RBM_CALLEE_SAVED | RBM_FP | RBM_RA)) == regsMask); // Do not expect anything else.
+
+    static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
+    return REGSIZE_BYTES;
+}
+
+//------------------------------------------------------------------------
+// genSaveCalleeSavedRegisterGroup: Saves the group of registers described by the mask.
+//
+// Arguments:
+//   regsMask             - a mask of registers for prolog generation;
+//   spDelta              - if non-zero, the amount to add to SP before the first register save (or together with it);
+//   spOffset             - the offset from SP that is the beginning of the callee-saved register area;
+//
+void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
+{
+    const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
+
+    ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen));
+    genBuildRegPairsStack(regsMask, &regStack);
+
+    for (int i = 0; i < regStack.Height(); ++i)
+    {
+        RegPair regPair = regStack.Bottom(i);
+        if (regPair.reg2 != REG_NA)
+        {
+            // We can use two SD instructions.
+            genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_R21,
+                                 nullptr);
+
+            spOffset += 2 * slotSize;
+        }
+        else
+        {
+            // No register pair; we use a SD instruction.
+            genPrologSaveReg(regPair.reg1, spOffset, spDelta, REG_R21, nullptr);
+            spOffset += slotSize;
+        }
+
+        spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
+    }
+}
+
+//------------------------------------------------------------------------
+// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
+// in the function or funclet prolog. Registers are saved in register number order from low addresses
+// to high addresses. This means that integer registers are saved at lower addresses than floatint-point/SIMD
+// registers.
+//
+// If establishing frame pointer chaining, it must be done after saving the callee-saved registers.
+//
+// We can only use the instructions that are allowed by the unwind codes. The caller ensures that
+// there is enough space on the frame to store these registers, and that the store instructions
+// we need to use (SD) are encodable with the stack-pointer immediate offsets we need to use.
+//
+// The caller can tell us to fold in a stack pointer adjustment, which we will do with the first instruction.
+// Note that the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
+// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
+// registers, though, we will have an empty aligment slot somewhere. It turns out we will put
+// it below (at a lower address) the callee-saved registers, as that is currently how we
+// do frame layout. This means that the first stack offset will be 8 and the stack pointer
+// adjustment must be done by a SUB, and not folded in to a pre-indexed store.
+//
+// Arguments:
+//    regsToSaveMask          - The mask of callee-saved registers to save. If empty, this function does nothing.
+//    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
+//                              if non-zero spDelta, then this is the offset of the first save *after* that
+//                              SP adjustment.
+//    spDelta                 - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                              zero).
+//
+// Notes:
+//    The save set can not contain FP/RA in which case FP/RA is saved along with the other callee-saved registers.
+//
+void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
+{
+    assert(spDelta <= 0);
+
+    unsigned regsToSaveCount = genCountBits(regsToSaveMask);
+    if (regsToSaveCount == 0)
+    {
+        if (spDelta != 0)
+        {
+            // Currently this is the case for varargs only
+            // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
+            genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+        }
+        return;
+    }
+
+    assert((spDelta % 16) == 0);
+
+    assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED));
+
+    // Save integer registers at higher addresses than floating-point registers.
+
+    regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
+    regMaskTP maskSaveRegsInt   = regsToSaveMask & ~maskSaveRegsFloat;
+
+    if (maskSaveRegsFloat != RBM_NONE)
+    {
+        genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset);
+        spDelta = 0;
+        lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES;
+    }
+
+    if (maskSaveRegsInt != RBM_NONE)
+    {
+        genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset);
+        // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this.
+    }
+}
+
+//------------------------------------------------------------------------
+// genRestoreCalleeSavedRegisterGroup: Restores the group of registers described by the mask.
+//
+// Arguments:
+//   regsMask             - a mask of registers for epilog generation;
+//   spDelta              - if non-zero, the amount to add to SP after the last register restore (or together with it);
+//   spOffset             - the offset from SP that is the beginning of the callee-saved register area;
+//
+void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
+{
+    const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
+
+    ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen));
+    genBuildRegPairsStack(regsMask, &regStack);
+
+    int stackDelta = 0;
+    for (int i = 0; i < regStack.Height(); ++i)
+    {
+        bool lastRestoreInTheGroup = (i == regStack.Height() - 1);
+        bool updateStackDelta      = lastRestoreInTheGroup && (spDelta != 0);
+        if (updateStackDelta)
+        {
+            // Update stack delta only if it is the last restore (the first save).
+            assert(stackDelta == 0);
+            stackDelta = spDelta;
+        }
+
+        RegPair regPair = regStack.Top(i);
+        if (regPair.reg2 != REG_NA)
+        {
+            spOffset -= 2 * slotSize;
+
+            genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, REG_R21,
+                                    nullptr);
+        }
+        else
+        {
+            spOffset -= slotSize;
+            genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_R21, nullptr);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame
+// in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp().
+//
+// Arguments:
+//    regsToRestoreMask       - The mask of callee-saved registers to restore. If empty, this function does nothing.
+//    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
+//    spDelta                 - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                              zero).
+//
+// Here's an example restore sequence:
+//      ld     s7, 88(sp)
+//      ld     s6, 80(sp)
+//      ld     s5, 72(sp)
+//      ld     s4, 64(sp)
+//      ld     s3, 56(sp)
+//      ld     s2, 48(sp)
+//      ld     s1, 40(sp)
+//      ld     s0, 32(sp)
+//
+// For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and
+// the last restore adjusts SP by the specified amount. For example:
+//      ld     s7, 56(sp)
+//      ld     s6, 48(sp)
+//      ld     s5, 40(sp)
+//      ld     s4, 32(sp)
+//      ld     s3, 24(sp)
+//      ld     s2, 16(sp)
+//      ld     s1, 88(sp)
+//      ld     s0, 80(sp)
+//
+// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
+// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta)
+{
+    assert(spDelta >= 0);
+    unsigned regsToRestoreCount = genCountBits(regsToRestoreMask);
+    if (regsToRestoreCount == 0)
+    {
+        if (spDelta != 0)
+        {
+            // Currently this is the case for varargs only
+            // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
+            genStackPointerAdjustment(spDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+        }
+        return;
+    }
+
+    assert((spDelta % 16) == 0);
+
+    // We also can restore FP and RA, even though they are not in RBM_CALLEE_SAVED.
+    assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_RA));
+
+    // Point past the end, to start. We predecrement to find the offset to load from.
+    static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
+    int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES;
+
+    // Save integer registers at higher addresses than floating-point registers.
+
+    regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
+    regMaskTP maskRestoreRegsInt   = regsToRestoreMask & ~maskRestoreRegsFloat;
+
+    // Restore in the opposite order of saving.
+
+    if (maskRestoreRegsInt != RBM_NONE)
+    {
+        int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment?
+        genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset);
+        spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES;
+    }
+
+    if (maskRestoreRegsFloat != RBM_NONE)
+    {
+        // If there is any spDelta, it must be used here.
+        genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset);
+        // No need to update spOffset since it's not used after this.
+    }
+}
+
+// clang-format off
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet prolog.
+ *
+ *  Funclets have the following incoming arguments:
+ *
+ *      catch:          a0 = the exception object that was caught (see GT_CATCH_ARG)
+ *      filter:         a0 = the exception object to filter (see GT_CATCH_ARG), a1 = CallerSP of the containing function
+ *      finally/fault:  none
+ *
+ *  Funclets set the following registers on exit:
+ *
+ *      catch:          v0 = the address at which execution should resume (see BBJ_EHCATCHRET)
+ *      filter:         v0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ *      finally/fault:  none
+ *
+ *  The LOONGARCH64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size,
+ *  including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16):
+ *
+ *  Frame type 1:
+ *     For #framesz <= 32760 and FP/RA at bottom:
+ *     daddiu sp,sp,-#framesz    ; establish the frame (predecrement by #framesz), save FP/RA
+ *     sd fp,#outsz(sp)
+ *     sd ra,#outsz+8(sp)
+ *     sd s0,#xxx-8(sp)          ; save callee-saved registers, as necessary
+ *     sd s1,#xxx(sp)
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |  Varargs regs space   | // Only for varargs main functions; 64 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned.
+ *      |-----------------------|
+ *      |      Saved FP, RA     | // 16 bytes
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ *  Frame type 2:
+ *     For #framesz <= 32760 and FP/RA at top:
+ *     daddiu sp,sp,-#framesz          ; establish the frame
+ *     sd s0,xxx(sp)                 ; save callee-saved registers, as necessary
+ *     sd s1,xxx+8(sp)
+ *     sd s?,xxx+?(sp)
+ *     sd fp,xxx+?(sp)              ; save FP/RA.
+ *     sd ra,xxx+?(sp)
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |  Varargs regs space   | // Only for varargs main functions; 64 bytes
+ *      |-----------------------|
+ *      |      Saved FP, RA     | // 16 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned.
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ *  Frame type 3:
+ *     For #framesz > 32760 and FP/RA at bottom:
+ *     ; for funclet, #framesz-#outsz will be less than 32760.
+ *
+ *     daddiu sp,sp,-(#framesz-#FPRA_delta)     ; note maybe 16byte-alignment.
+ *     sd fp, pad(sp)                           ; pad is depended on stack-16byte-alignment..
+ *     sd ra, pad+8(sp)
+ *     sd s0,#xxx(sp)                         ; save callee-saved registers, as necessary,
+ *     sd s1,#xxx+8(sp)
+ *     daddiu sp,sp,-#outsz                     ; create space for outgoing argument space, mabye 16byte-alignment.
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |  Varargs regs space   | // Only for varargs main functions; 64 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+ *      |-----------------------|
+ *      ~  alignment padding    ~
+ *      |-----------------------|
+ *      |      Saved FP, RA     | // 16 bytes
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ *  Frame type 4:
+ *     For #framesz > 32760 and FP/RA at top:
+ *     daddiu sp,sp,-#framesz+PSP_offset  ; establish the frame, maybe 16byte-alignment.
+ *     sd s0,xxx(sp)                      ; save callee-saved registers, as necessary
+ *     sd s1,xxx+8(sp)
+ *     sd s?,xxx+?(sp)
+ *     sd fp,xxx+?(sp)              ; save FP/RA.
+ *     sd ra,xxx+?(sp)
+ *
+ *     daddiu sp,sp,-#PSP_offset    ; establish the frame, maybe 16byte-alignment.
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |  Varargs regs space   | // Only for varargs main functions; 64 bytes
+ *      |-----------------------|
+ *      |      Saved FP, RA     | // 16 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned.
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ *
+ * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3,
+ * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack
+ * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 224 bytes:
+ *
+ *     FP,RA registers
+ *     8 int callee-saved register s0-s7
+ *     8 float callee-saved registers f24-f31
+ *     8 saved integer argument registers a0-a7, if varargs function
+ *     1 PSP slot
+ *     1 alignment slot, future maybe add gp
+ *     == 28 slots * 8 bytes = 224 bytes.
+ *
+ * The outgoing argument size, however, can be very large, if we call a function that takes a large number of
+ * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
+ * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of
+ * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before
+ * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset
+ * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument
+ * space. Both changes to SP might need to add alignment padding.
+ *
+ * In addition to the above "standard" frames, we also need to support a frame where the saved FP/RA are at the
+ * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/RA
+ * and the PSPSym) that is used in the main function when a GS cookie is required due to the use of localloc.
+ * (Note that localloc cannot be used in a funclet.) In these variants, not only has the position of FP/RA
+ * changed, but where the alignment padding is placed has also changed.
+ *
+ *
+ * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP
+ * as in the main function.
+ *
+ * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we
+ * must add buffer space for the saved varargs/argument registers here, if the main function did the same.
+ *
+ *     ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
+ *     ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
+ *
+ *     if (this is a filter funclet)
+ *     {
+ *          // a1 on entry to a filter funclet is CallerSP of the containing function:
+ *          // either the main function, or the funclet for a handler that this filter is dynamically nested within.
+ *          // Note that a filter can be dynamically nested within a funclet even if it is not statically within
+ *          // a funclet. Consider:
+ *          //
+ *          //    try {
+ *          //        try {
+ *          //            throw new Exception();
+ *          //        } catch(Exception) {
+ *          //            throw new Exception();     // The exception thrown here ...
+ *          //        }
+ *          //    } filter {                         // ... will be processed here, while the "catch" funclet frame is still on the stack
+ *          //    } filter-handler {
+ *          //    }
+ *          //
+ *          // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
+ *          // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
+ *          // create a main function PSP for any function with a filter.
+ *
+ *          ld a1, CallerSP_to_PSP_slot_delta(a1)  ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
+ *          sd a1, SP_to_PSP_slot_delta(sp)        ; store the PSP
+ *          daddiu fp, a1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
+ *     }
+ *     else
+ *     {
+ *          // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
+ *          // TODO-LOONGARCH64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction.
+ *
+ *          daddiu a3, fp, Function_FP_to_CallerSP_delta  ; compute the CallerSP, given the frame pointer. a3 is scratch?
+ *          sd a3, SP_to_PSP_slot_delta(sp)         ; store the PSP
+ *     }
+ *
+ *  An example epilog sequence is then:
+ *
+ *     daddiu sp,sp,#outsz             ; if any outgoing argument space
+ *     ...                          ; restore callee-saved registers
+ *     ld s0,#xxx-8(sp)
+ *     ld s1,#xxx(sp)
+ *     ld fp,#framesz-8(sp)
+ *     ld ra,#framesz(sp)
+ *     daddiu  sp,sp,#framesz
+ *     jr  ra
+ *
+ */
+// clang-format on
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFuncletProlog()\n");
+#endif
+
+    assert(block != NULL);
+    assert(block->bbFlags & BBF_FUNCLET_BEG);
+
+    ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+    gcInfo.gcResetForBB();
+
+    compiler->unwindBegProlog();
+
+    regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+    regMaskTP maskSaveRegsInt   = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
+
+    // Funclets must always save RA and FP, since when we have funclets we must have an FP frame.
+    assert((maskSaveRegsInt & RBM_RA) != 0);
+    assert((maskSaveRegsInt & RBM_FP) != 0);
+
+    bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
+    int frameSize  = genFuncletInfo.fiSpDelta1;
+
+    regMaskTP maskArgRegsLiveIn;
+    if (isFilter)
+    {
+        maskArgRegsLiveIn = RBM_A0 | RBM_A1;
+    }
+    else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+    {
+        maskArgRegsLiveIn = RBM_NONE;
+    }
+    else
+    {
+        maskArgRegsLiveIn = RBM_A0;
+    }
+
+#ifdef DEBUG
+    if (compiler->opts.disAsm)
+        printf("DEBUG: CodeGen::genFuncletProlog, frameType:%d\n\n", genFuncletInfo.fiFrameType);
+#endif
+
+    int offset = 0;
+    if (genFuncletInfo.fiFrameType == 1)
+    {
+        // fiFrameType constraints:
+        assert(frameSize < 0);
+        assert(frameSize >= -2048);
+
+        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
+        genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
+
+// #ifdef OPTIMIZE_LOONGSON_EXT
+//         if (!(genFuncletInfo.fiSP_to_FPRA_save_delta & 0xf) && (genFuncletInfo.fiSP_to_FPRA_save_delta <= 0xff0))
+//         {
+//             GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta >> 4);
+//             compiler->unwindSaveRegPair(REG_FP, REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta);
+//         }
+//         else
+// #endif
+        {
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
+        compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
+
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+        compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+        }
+
+        maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now
+
+        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
+    }
+    else if (genFuncletInfo.fiFrameType == 2)
+    {
+        // fiFrameType constraints:
+        assert(frameSize < 0);
+        assert(frameSize >= -2048);
+
+        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
+        genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
+
+        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
+    }
+    else if (genFuncletInfo.fiFrameType == 3)
+    {
+        // fiFrameType constraints:
+        assert(frameSize < -2048);
+
+        offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
+        int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
+        offset = SP_delta - offset;
+
+        genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+
+// #ifdef OPTIMIZE_LOONGSON_EXT
+//         if (!(offset & 0xf) && (offset <= 0xff0))
+//         {
+//             GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4);
+//             compiler->unwindSaveRegPair(REG_FP, REG_RA, offset);
+//         }
+//         else
+// #endif
+        {
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
+        compiler->unwindSaveReg(REG_FP, offset);
+
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
+        compiler->unwindSaveReg(REG_RA, offset + 8);
+        }
+
+        maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now
+
+        offset = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8;
+        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0);
+
+        genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else if (genFuncletInfo.fiFrameType == 4)
+    {
+        // fiFrameType constraints:
+        assert(frameSize < -2048);
+
+        offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8);
+        int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
+        offset = SP_delta - offset;
+
+        genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+
+        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0);
+
+        genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else
+    {
+        unreached();
+    }
+
+    // This is the end of the OS-reported prolog for purposes of unwinding
+    compiler->unwindEndProlog();
+
+    // If there is no PSPSym (CoreRT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet frame.
+    if (compiler->lvaPSPSym != BAD_VAR_NUM)
+    {
+        if (isFilter)
+        {
+            // This is the first block of a filter
+            // Note that register a1 = CallerSP of the containing function
+            // A1 is overwritten by the first Load (new callerSP)
+            // A2 is scratch when we have a large constant offset
+
+            // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or
+            // function)
+            genInstrWithConstant(INS_ld_d, EA_PTRSIZE, REG_A1, REG_A1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta,
+                                 REG_A2, false);
+            regSet.verifyRegUsed(REG_A1);
+
+            // Store the PSP value (aka CallerSP)
+            genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2,
+                                 false);
+
+            // re-establish the frame pointer
+            genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_A1,
+                                 genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false);
+        }
+        else // This is a non-filter funclet
+        {
+            // A3 is scratch, A2 can also become scratch.
+
+            // compute the CallerSP, given the frame pointer. a3 is scratch?
+            genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_A3, REG_FPBASE,
+                                 -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false);
+            regSet.verifyRegUsed(REG_A3);
+
+            genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2,
+                                 false);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFuncletEpilog()\n");
+#endif
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    bool unwindStarted = false;
+    int frameSize  = genFuncletInfo.fiSpDelta1;
+
+    if (!unwindStarted)
+    {
+        // We can delay this until we know we'll generate an unwindable instruction, if necessary.
+        compiler->unwindBegEpilog();
+        unwindStarted = true;
+    }
+
+    regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+    regMaskTP maskRestoreRegsInt   = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
+
+    // Funclets must always save RA and FP, since when we have funclets we must have an FP frame.
+    assert((maskRestoreRegsInt & RBM_RA) != 0);
+    assert((maskRestoreRegsInt & RBM_FP) != 0);
+
+#ifdef DEBUG
+    if (compiler->opts.disAsm)
+        printf("DEBUG: CodeGen::genFuncletEpilog, frameType:%d\n\n", genFuncletInfo.fiFrameType);
+#endif
+
+    regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
+
+    assert(frameSize < 0);
+    if (genFuncletInfo.fiFrameType == 1)
+    {
+        // fiFrameType constraints:
+        assert(frameSize >= -2048);
+        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
+
+        regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end
+
+        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
+
+// #ifdef OPTIMIZE_LOONGSON_EXT
+//         if (!(genFuncletInfo.fiSP_to_FPRA_save_delta & 0xf) && (genFuncletInfo.fiSP_to_FPRA_save_delta <= 0xff0))
+//         {
+//             GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta >> 4);
+//             compiler->unwindSaveRegPair(REG_FP, REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta);
+//         }
+//         else
+// #endif
+        {
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+        compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
+        compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
+        }
+
+        // generate daddiu SP,SP,imm
+        genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else if (genFuncletInfo.fiFrameType == 2)
+    {
+        // fiFrameType constraints:
+        assert(frameSize >= -2048);
+        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
+
+        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
+
+        // generate daddiu SP,SP,imm
+        genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else if (genFuncletInfo.fiFrameType == 3)
+    {
+        // fiFrameType constraints:
+        assert(frameSize < -2048);
+
+
+        int offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
+        int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
+        offset = SP_delta - offset;
+
+        //first, generate daddiu SP,SP,imm
+        genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+
+        int offset2 = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8;
+        assert(offset2 < 2040);//can amend.
+
+        regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end
+        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset2, 0);
+
+// #ifdef OPTIMIZE_LOONGSON_EXT
+//         if (!(offset & 0xf) && (offset <= 0xff0))
+//         {
+//             GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4);
+//             compiler->unwindSaveRegPair(REG_FP, REG_RA, offset);
+//         }
+//         else
+// #endif
+        {
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
+        compiler->unwindSaveReg(REG_RA, offset + 8);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
+        compiler->unwindSaveReg(REG_FP, offset);
+        }
+        //second, generate daddiu SP,SP,imm for remaine space.
+        genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else if (genFuncletInfo.fiFrameType == 4)
+    {
+        // fiFrameType constraints:
+        assert(frameSize < -2048);
+
+        int offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8);
+        int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
+        offset = SP_delta - offset;
+
+        genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+
+        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset, 0);
+
+        genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else
+    {
+        unreached();
+    }
+    GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0);
+    compiler->unwindReturn(REG_RA);
+
+    compiler->unwindEndEpilog();
+}
+
+/*****************************************************************************
+ *
+ *  Capture the information used to generate the funclet prologs and epilogs.
+ *  Note that all funclet prologs are identical, and all funclet epilogs are
+ *  identical (per type: filters are identical, and non-filters are identical).
+ *  Thus, we compute the data used for these just once.
+ *
+ *  See genFuncletProlog() for more information about the prolog/epilog sequences.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+    if (!compiler->ehAnyFunclets())
+        return;
+
+    assert(isFramePointerUsed());
+
+    // The frame size and offsets must be finalized
+    assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
+
+    genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
+
+    regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
+    assert((rsMaskSaveRegs & RBM_RA) != 0);
+    assert((rsMaskSaveRegs & RBM_FP) != 0);
+
+    unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? 8 : 0;
+
+    unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
+    assert((saveRegsCount == compiler->compCalleeRegsPushed) || (saveRegsCount == compiler->compCalleeRegsPushed - 1));
+
+    unsigned saveRegsPlusPSPSize;
+    if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+        saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize +PSPSize/* -2*8*/;
+    else
+        saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize +PSPSize;
+
+    if (compiler->info.compIsVarArgs)
+    {
+        // For varargs we always save all of the integer register arguments
+        // so that they are contiguous with the incoming stack arguments.
+        saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES;
+    }
+    unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
+
+    assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+    unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
+
+    unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
+    assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
+
+    int SP_to_FPRA_save_delta = compiler->lvaOutgoingArgSpaceSize;
+
+    unsigned funcletFrameSize        = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
+    unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
+    assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
+
+    unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
+    assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
+
+    if (maxFuncletFrameSizeAligned <= (2048-8))
+    {
+        if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+        {
+            genFuncletInfo.fiFrameType = 1;
+            saveRegsPlusPSPSize -= 2*8;// FP/RA
+        }
+        else
+        {
+            genFuncletInfo.fiFrameType = 2;
+            SP_to_FPRA_save_delta += REGSIZE_BYTES + PSPSize;
+        }
+    }
+    else
+    {
+        unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
+        assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
+
+        if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+        {
+            genFuncletInfo.fiFrameType = 3;
+            saveRegsPlusPSPSize -= 2*8;// FP/RA
+        }
+        else
+        {
+            genFuncletInfo.fiFrameType = 4;
+            SP_to_FPRA_save_delta += REGSIZE_BYTES + PSPSize;
+        }
+    }
+
+
+    int CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize;
+    genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
+    int SP_to_PSP_slot_delta = funcletFrameSizeAligned - saveRegsPlusPSPSize;
+
+    /* Now save it for future use */
+    genFuncletInfo.fiSaveRegs                   = rsMaskSaveRegs;
+    genFuncletInfo.fiSP_to_FPRA_save_delta      = SP_to_FPRA_save_delta;
+
+    genFuncletInfo.fiSP_to_PSP_slot_delta       = SP_to_PSP_slot_delta;
+    genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n");
+        printf("Funclet prolog / epilog info\n");
+        printf("                        Save regs: ");
+        dspRegMask(genFuncletInfo.fiSaveRegs);
+        printf("\n");
+        printf("    Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
+        printf("  SP to FP/RA save location delta: %d\n", genFuncletInfo.fiSP_to_FPRA_save_delta);
+        printf("                       Frame type: %d\n", genFuncletInfo.fiFrameType);
+        printf("                       SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
+
+        if (compiler->lvaPSPSym != BAD_VAR_NUM)
+        {
+            if (CallerSP_to_PSP_slot_delta !=
+                compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+            {
+                printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
+                       compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+            }
+        }
+    }
+
+    assert(genFuncletInfo.fiSP_to_FPRA_save_delta >= 0);
+#endif // DEBUG
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           End Prolog / Epilog                             XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
+{
+    // Generate a call to the finally, like this:
+    //      mov  a0,qword ptr [fp + 10H] / sp    // Load a0 with PSPSym, or sp if PSPSym is not used
+    //      bl  finally-funclet
+    //      b    finally-return                  // Only for non-retless finally calls
+    // The 'b' can be a NOP if we're going to the next block.
+
+    if (compiler->lvaPSPSym != BAD_VAR_NUM)
+    {
+        GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, REG_A0, compiler->lvaPSPSym, 0);
+    }
+    else
+    {
+        GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_A0, REG_SPBASE, 0);
+    }
+    GetEmitter()->emitIns_J(INS_bl, block->bbJumpDest);
+
+    if (block->bbFlags & BBF_RETLESS_CALL)
+    {
+        // We have a retless call, and the last instruction generated was a call.
+        // If the next block is in a different EH region (or is the end of the code
+        // block), then we need to generate a breakpoint here (since it will never
+        // get executed) to get proper unwind behavior.
+
+        if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+        {
+            instGen(INS_break); // This should never get executed
+        }
+    }
+    else
+    {
+        // Because of the way the flowgraph is connected, the liveness info for this one instruction
+        // after the call is not (can not be) correct in cases where a variable has a last use in the
+        // handler.  So turn off GC reporting for this single instruction.
+        GetEmitter()->emitDisableGC();
+
+        // Now go to where the finally funclet needs to return to.
+        if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
+        {
+            // Fall-through.
+            // TODO-LOONGARCH64-CQ: Can we get rid of this instruction, and just have the call return directly
+            // to the next instruction? This would depend on stack walking from within the finally
+            // handler working without this instruction being in this special EH region.
+            instGen(INS_nop);
+        }
+        else
+        {
+            inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
+        }
+
+        GetEmitter()->emitEnableGC();
+    }
+
+    // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+    // jump target using bbJumpDest - that is already used to point
+    // to the finally block. So just skip past the BBJ_ALWAYS unless the
+    // block is RETLESS.
+    if (!(block->bbFlags & BBF_RETLESS_CALL))
+    {
+        assert(block->isBBCallAlwaysPair());
+        block = block->bbNext;
+    }
+    return block;
+}
+
+void CodeGen::genEHCatchRet(BasicBlock* block)
+{
+    GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
+}
+
+//  move an immediate value into an integer register
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
+                                     regNumber reg,
+                                     ssize_t   imm,
+                                     insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
+{//maybe optimize.
+    emitter* emit = GetEmitter();
+
+    if (!compiler->opts.compReloc)
+    {
+        size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs.
+    }
+
+    if (EA_IS_RELOC(size))
+    {
+        assert(genIsValidIntReg(reg));//TODO: maybe optimize!!!
+        emit->emitIns_R_AI(INS_bl, size, reg, imm);//for example: EA_PTR_DSP_RELOC
+    }
+    else
+    {
+        emit->emitIns_I_la(size, reg, imm);//TODO: maybe optimize.
+    }
+
+    regSet.verifyRegUsed(reg);
+}
+
+/***********************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree)
+{
+    switch (tree->gtOper)
+    {
+        case GT_CNS_INT:
+        {
+            // relocatable values tend to come down as a CNS_INT of native int type
+            // so the line between these two opcodes is kind of blurry
+            GenTreeIntConCommon* con    = tree->AsIntConCommon();
+            ssize_t              cnsVal = con->IconValue();
+
+            //if (con->ImmedValNeedsReloc(compiler))
+            if (con->ImmedValNeedsReloc(compiler) && compiler->opts.compReloc)
+            {
+                //instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+                assert(compiler->opts.compReloc);
+                GetEmitter()->emitIns_R_AI(INS_bl, EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+                regSet.verifyRegUsed(targetReg);
+            }
+            else
+            {
+                genSetRegToIcon(targetReg, cnsVal, targetType);
+            }
+        }
+        break;
+
+        case GT_CNS_DBL:
+        {
+            emitter* emit       = GetEmitter();
+            emitAttr size       = emitActualTypeSize(tree);
+            double   constValue = tree->AsDblCon()->gtDconVal;
+
+            // Make sure we use "daddiu reg, zero, 0x00"  only for positive zero (0.0)
+            // and not for negative zero (-0.0)
+            if (*(__int64*)&constValue == 0)
+            {
+                // A faster/smaller way to generate 0.0
+                // We will just zero out the entire vector register for both float and double
+                emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, REG_R0);
+            }
+            /*else if (emitter::emitIns_valid_imm_for_fmov(constValue))
+            {// LOONGARCH64 doesn't need this.
+                assert(!"unimplemented on LOONGARCH yet");
+            }*/
+            else
+            {
+                // Get a temp integer register to compute long address.
+                //regNumber addrReg = tree->GetSingleTempReg();
+
+                // We must load the FP constant from the constant pool
+                // Emit a data section constant for the float or double constant.
+                CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size);
+
+                // Load the FP constant.
+                assert(targetReg >= REG_F0);
+
+                instruction ins = size == EA_4BYTE ? INS_fld_s : INS_fld_d;
+
+                // Compute the address of the FP constant and load the data.
+                emit->emitIns_R_C(ins, size, targetReg, REG_NA, hnd, 0);
+            }
+        }
+        break;
+
+        default:
+            unreached();
+    }
+}
+
+// Produce code for a GT_INC_SATURATE node.
+void CodeGen::genCodeForIncSaturate(GenTree* tree)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    regNumber targetReg = tree->GetRegNum();
+
+    // The arithmetic node must be sitting in a register (since it's not contained)
+    assert(!tree->isContained());
+    // The dst can only be a register.
+    assert(targetReg != REG_NA);
+
+    GenTree* operand = tree->gtGetOp1();
+    assert(!operand->isContained());
+    // The src must be a register.
+    regNumber operandReg = genConsumeReg(operand);
+
+    GetEmitter()->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(tree), targetReg, operandReg, 1);
+    GetEmitter()->emitIns_R_R_I(INS_bne, emitActualTypeSize(tree), targetReg, REG_R0, 2);
+    GetEmitter()->emitIns_R_R_R(INS_andn, emitActualTypeSize(tree), targetReg, targetReg, REG_R0);
+
+    genProduceReg(tree);
+#endif
+}
+
+// Generate code to get the high N bits of a N*N=2N bit multiplication result
+void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
+{
+    assert(!treeNode->gtOverflowEx());
+
+    genConsumeOperands(treeNode);
+
+    regNumber targetReg  = treeNode->GetRegNum();
+    var_types targetType = treeNode->TypeGet();
+    emitter*  emit       = GetEmitter();
+    emitAttr  attr       = emitActualTypeSize(treeNode);
+    unsigned  isUnsigned = (treeNode->gtFlags & GTF_UNSIGNED);
+
+    GenTree* op1 = treeNode->gtGetOp1();
+    GenTree* op2 = treeNode->gtGetOp2();
+
+    assert(!varTypeIsFloating(targetType));
+
+    // op1 and op2 can only be a reg at present, will amend in the future.
+    assert(!op1->isContained());
+    assert(!op2->isContained());
+
+    // The arithmetic node must be sitting in a register (since it's not contained)
+    assert(targetReg != REG_NA);
+
+    if (EA_SIZE(attr) == EA_8BYTE)
+    {
+        instruction ins = isUnsigned ? INS_mulh_du : INS_mulh_d;
+
+        emit->emitIns_R_R_R(ins, attr, targetReg, op1->GetRegNum(), op2->GetRegNum());
+    }
+    else
+    {
+        assert(EA_SIZE(attr) == EA_4BYTE);
+        instruction ins = isUnsigned ? INS_mulh_wu : INS_mulh_w;
+
+        emit->emitIns_R_R_R(ins, attr, targetReg, op1->GetRegNum(), op2->GetRegNum());
+    }
+
+    genProduceReg(treeNode);
+}
+
+// Generate code for ADD, SUB, MUL, AND, OR and XOR
+// This method is expected to have called genConsumeOperands() before calling it.
+void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
+{
+    const genTreeOps oper       = treeNode->OperGet();
+    regNumber        targetReg  = treeNode->GetRegNum();
+    emitter*         emit       = GetEmitter();
+
+    assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_AND ||
+           oper == GT_OR || oper == GT_XOR);
+
+    GenTree*    op1 = treeNode->gtGetOp1();
+    GenTree*    op2 = treeNode->gtGetOp2();
+    instruction ins = genGetInsForOper(treeNode);
+
+    // The arithmetic node must be sitting in a register (since it's not contained)
+    assert(targetReg != REG_NA);
+
+    regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2);
+    assert(r == targetReg);
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCodeForLclVar: Produce code for a GT_LCL_VAR node.
+//
+// Arguments:
+//    tree - the GT_LCL_VAR node
+//
+void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
+{
+    unsigned varNum = tree->GetLclNum();
+    assert(varNum < compiler->lvaCount);
+    LclVarDsc* varDsc         = &(compiler->lvaTable[varNum]);
+    bool       isRegCandidate = varDsc->lvIsRegCandidate();
+
+    // lcl_vars are not defs
+    assert((tree->gtFlags & GTF_VAR_DEF) == 0);
+
+    // If this is a register candidate that has been spilled, genConsumeReg() will
+    // reload it at the point of use.  Otherwise, if it's not in a register, we load it here.
+
+    if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED))
+    {
+        var_types  targetType = varDsc->GetRegisterType(tree);
+        //if (tree->gtFlags & GTF_UNSIGNED)
+        //    targetType = varTypeSignedToUnsigned(targetType);//uuuuu.
+        emitter*  emit       = GetEmitter();
+
+        // targetType must be a normal scalar type and not a TYP_STRUCT
+        assert(targetType != TYP_STRUCT);
+        instruction ins  = ins_Load(targetType);
+        emitAttr    attr = emitTypeSize(targetType);
+
+        emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0);
+        genProduceReg(tree);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreLclFld: Produce code for a GT_STORE_LCL_FLD node.
+//
+// Arguments:
+//    tree - the GT_STORE_LCL_FLD node
+//
+void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
+{
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+    emitter*  emit       = GetEmitter();
+    noway_assert(targetType != TYP_STRUCT);
+
+#ifdef FEATURE_SIMD
+    // storing of TYP_SIMD12 (i.e. Vector3) field
+    if (tree->TypeGet() == TYP_SIMD12)
+    {
+        genStoreLclTypeSIMD12(tree);
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    // record the offset
+    unsigned offset = tree->GetLclOffs();
+
+    // We must have a stack store with GT_STORE_LCL_FLD
+    noway_assert(targetReg == REG_NA);
+
+    unsigned varNum = tree->GetLclNum();
+    assert(varNum < compiler->lvaCount);
+    LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+    // Ensure that lclVar nodes are typed correctly.
+    assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+    GenTree* data = tree->gtOp1;
+    genConsumeRegs(data);
+
+    regNumber dataReg = REG_NA;
+    if (data->isContainedIntOrIImmed())
+    {
+        assert(data->IsIntegralConst(0));
+        dataReg = REG_R0;
+    }
+    else if (data->isContained())
+    {
+        assert(data->OperIs(GT_BITCAST));
+        const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1();
+        assert(!bitcastSrc->isContained());
+        dataReg = bitcastSrc->GetRegNum();
+    }
+    else
+    {
+        assert(!data->isContained());
+        dataReg = data->GetRegNum();
+    }
+    assert(dataReg != REG_NA);
+
+    instruction ins = ins_StoreFromSrc(dataReg, targetType);
+
+    emitAttr attr = emitTypeSize(targetType);
+
+    emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
+
+    genUpdateLife(tree);
+
+    varDsc->SetRegNum(REG_STK);
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreLclVar: Produce code for a GT_STORE_LCL_VAR node.
+//
+// Arguments:
+//    lclNode - the GT_STORE_LCL_VAR node
+//
+void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
+{
+    GenTree* data = lclNode->gtOp1;
+
+    // var = call, where call returns a multi-reg return value
+    // case is handled separately.
+    if (data->gtSkipReloadOrCopy()->IsMultiRegNode())
+    {
+        genMultiRegCallStoreToLocal(lclNode);
+        return;
+    }
+
+    regNumber targetReg  = lclNode->GetRegNum();
+    emitter*  emit       = GetEmitter();
+
+    unsigned varNum = lclNode->GetLclNum();
+    assert(varNum < compiler->lvaCount);
+    LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+    var_types targetType = varDsc->GetRegisterType(lclNode);
+
+    if (lclNode->IsMultiReg())
+    {
+        regNumber    operandReg = genConsumeReg(data);
+        unsigned int regCount   = varDsc->lvFieldCnt;
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            assert(!"unimplemented on LOONGARCH yet");
+            regNumber varReg = lclNode->GetRegByIndex(i);
+            assert(varReg != REG_NA);
+            unsigned   fieldLclNum = varDsc->lvFieldLclStart + i;
+            LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldLclNum);
+            assert(fieldVarDsc->TypeGet() == TYP_FLOAT);
+            GetEmitter()->emitIns_R_R_I(INS_st_d, emitTypeSize(TYP_FLOAT), varReg, operandReg, i);
+        }
+        genProduceReg(lclNode);
+    }
+    else
+    {
+#ifdef FEATURE_SIMD
+        // storing of TYP_SIMD12 (i.e. Vector3) field
+        if (lclNode->TypeGet() == TYP_SIMD12)
+        {
+            genStoreLclTypeSIMD12(lclNode);
+            return;
+        }
+#endif // FEATURE_SIMD
+
+        genConsumeRegs(data);
+
+        regNumber dataReg = REG_NA;
+        if (data->isContained())
+        {
+            // This is only possible for a zero-init or bitcast.
+            const bool zeroInit = data->IsIntegralConst(0);
+#if 0
+            //TODO: supporting the SIMD on LoongArch64.
+            if (zeroInit && varTypeIsSIMD(targetType))
+            {
+                assert(!varTypeIsSIMD(targetType));
+                //assert(targetType == TYP_SIMD8);//TODO:TYP_SIMD16
+                assert(targetReg == REG_NA);
+                GetEmitter()->emitIns_S_R(INS_st_d, EA_8BYTE, REG_R0, varNum, 0);
+                genUpdateLife(lclNode);
+                return;
+            }
+#else
+            assert(!varTypeIsSIMD(targetType));
+#endif
+
+            if (zeroInit)
+            {
+                dataReg = REG_R0;
+            }
+            else if (data->IsIntegralConst())
+            {
+                ssize_t imm = data->AsIntConCommon()->IconValue();
+                emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
+                dataReg = REG_R21;
+            }
+            else
+            {
+                assert(data->OperIs(GT_BITCAST));
+                const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1();
+                assert(!bitcastSrc->isContained());
+                dataReg = bitcastSrc->GetRegNum();
+            }
+        }
+        else
+        {
+            assert(!data->isContained());
+            dataReg = data->GetRegNum();
+        }
+        assert(dataReg != REG_NA);
+
+        if (targetReg == REG_NA) // store into stack based LclVar
+        {
+            inst_set_SV_var(lclNode);
+
+            instruction ins  = ins_StoreFromSrc(dataReg, targetType);
+            emitAttr    attr = emitActualTypeSize(targetType);
+
+            emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0);
+
+            genUpdateLife(lclNode);
+
+            varDsc->SetRegNum(REG_STK);
+        }
+        else // store into register (i.e move into register)
+        {
+            if (dataReg != targetReg)
+            {
+                // Assign into targetReg when dataReg (from op1) is not the same register
+                inst_Mov(targetType, targetReg, dataReg, true);
+            }
+            genProduceReg(lclNode);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genSimpleReturn: Generates code for simple return statement for loongarch64.
+//
+// Note: treeNode's and op1's registers are already consumed.
+//
+// Arguments:
+//    treeNode - The GT_RETURN or GT_RETFILT tree node with non-struct and non-void type
+//
+// Return Value:
+//    None
+//
+void CodeGen::genSimpleReturn(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+    GenTree*  op1        = treeNode->gtGetOp1();
+    var_types targetType = treeNode->TypeGet();
+
+    assert(targetType != TYP_STRUCT);
+    assert(targetType != TYP_VOID);
+
+    regNumber retReg = varTypeUsesFloatArgReg(treeNode) ? REG_FLOATRET : REG_INTRET;
+
+    bool movRequired = (op1->GetRegNum() != retReg);
+
+    if (!movRequired)
+    {
+        if (op1->OperGet() == GT_LCL_VAR)
+        {
+            GenTreeLclVarCommon* lcl            = op1->AsLclVarCommon();
+            bool                 isRegCandidate = compiler->lvaTable[lcl->GetLclNum()].lvIsRegCandidate();
+            if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0))
+            {
+                // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR
+
+                unsigned   lclNum  = lcl->GetLclNum();
+                LclVarDsc* varDsc  = &(compiler->lvaTable[lclNum]);
+                var_types  op1Type = genActualType(op1->TypeGet());
+                var_types  lclType = genActualType(varDsc->TypeGet());
+
+                if (genTypeSize(op1Type) < genTypeSize(lclType))
+                {
+                    movRequired = true;
+                }
+            }
+        }
+    }
+    if (movRequired)
+    {
+        emitAttr attr = emitActualTypeSize(targetType);
+        if (varTypeUsesFloatArgReg(treeNode))
+        {
+            if (attr == EA_4BYTE)
+                GetEmitter()->emitIns_R_R(INS_fmov_s, attr, retReg, op1->GetRegNum());
+            else
+                GetEmitter()->emitIns_R_R(INS_fmov_d, attr, retReg, op1->GetRegNum());
+        }
+        else
+        {
+            if (attr == EA_4BYTE) // && op1->OperIs(GT_LCL_VAR) && (emitActualTypeSize(compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvType) == EA_8BYTE))
+            {
+                if (treeNode->gtFlags & GTF_UNSIGNED)
+                    GetEmitter()->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, retReg, op1->GetRegNum(), 31, 0);
+                else
+                    GetEmitter()->emitIns_R_R_I(INS_slli_w, attr, retReg, op1->GetRegNum(), 0);
+            }
+            else
+                GetEmitter()->emitIns_R_R_I(INS_ori, attr, retReg, op1->GetRegNum(), 0);
+        }
+    }
+}
+
+/***********************************************************************************************
+ *  Generate code for localloc
+ */
+void CodeGen::genLclHeap(GenTree* tree)
+{
+    assert(tree->OperGet() == GT_LCLHEAP);
+    assert(compiler->compLocallocUsed);
+
+    emitter* emit = GetEmitter();
+    GenTree* size = tree->AsOp()->gtOp1;
+    noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+    regNumber            targetReg                = tree->GetRegNum();
+    regNumber            regCnt                   = REG_NA;
+    regNumber            pspSymReg                = REG_NA;
+    var_types            type                     = genActualType(size->gtType);
+    emitAttr             easz                     = emitTypeSize(type);
+    BasicBlock*          endLabel                 = nullptr;//can optimize for loongarch.
+    unsigned             stackAdjustment          = 0;
+    const target_ssize_t ILLEGAL_LAST_TOUCH_DELTA = (target_ssize_t)-1;
+    target_ssize_t       lastTouchDelta =
+        ILLEGAL_LAST_TOUCH_DELTA; // The number of bytes from SP to the last stack address probed.
+
+    noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
+    noway_assert(genStackLevel == 0);   // Can't have anything on the stack
+
+    // compute the amount of memory to allocate to properly STACK_ALIGN.
+    size_t amount = 0;
+    if (size->IsCnsIntOrI())
+    {
+        // If size is a constant, then it must be contained.
+        assert(size->isContained());
+
+        // If amount is zero then return null in targetReg
+        amount = size->AsIntCon()->gtIconVal;
+        if (amount == 0)
+        {
+            instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
+            goto BAILOUT;
+        }
+
+        // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
+        amount = AlignUp(amount, STACK_ALIGN);
+    }
+    else
+    {
+        // If 0 bail out by returning null in targetReg
+        genConsumeRegAndCopy(size, targetReg);
+        endLabel = genCreateTempLabel();
+        emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0);
+
+        // Compute the size of the block to allocate and perform alignment.
+        // If compInitMem=true, we can reuse targetReg as regcnt,
+        // since we don't need any internal registers.
+        if (compiler->info.compInitMem)
+        {
+            assert(tree->AvailableTempRegCount() == 0);
+            regCnt = targetReg;
+        }
+        else
+        {
+            regCnt = tree->ExtractTempReg();
+            if (regCnt != targetReg)
+            {
+                emit->emitIns_R_R_I(INS_ori, easz, regCnt, targetReg, 0);
+            }
+        }
+
+        // Align to STACK_ALIGN
+        // regCnt will be the total number of bytes to localloc
+        inst_RV_IV(INS_addi_d, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
+
+        assert(regCnt != REG_R21);
+        ssize_t imm2 = ~(STACK_ALIGN - 1);
+        emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, imm2);
+        emit->emitIns_R_R_R(INS_and, emitActualTypeSize(type), regCnt, regCnt, REG_R21);
+    }
+
+    // If we have an outgoing arg area then we must adjust the SP by popping off the
+    // outgoing arg area. We will restore it right before we return from this method.
+    //
+    // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
+    // are the cases that need to be handled:
+    //   i) Method has out-going arg area.
+    //      It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
+    //      Therefore, we will pop off the out-going arg area from the stack pointer before allocating the localloc
+    //      space.
+    //  ii) Method has no out-going arg area.
+    //      Nothing to pop off from the stack.
+    if (compiler->lvaOutgoingArgSpaceSize > 0)
+    {
+        unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
+        //assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+        //                                                                // aligned
+        genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, outgoingArgSpaceAligned,
+                             rsGetRsvdReg());
+        stackAdjustment += outgoingArgSpaceAligned;
+    }
+
+    if (size->IsCnsIntOrI())
+    {
+        // We should reach here only for non-zero, constant size allocations.
+        assert(amount > 0);
+        ssize_t imm = -16;
+
+        // For small allocations we will generate up to four stp instructions, to zero 16 to 64 bytes.
+        static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
+        assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time
+        size_t stpCount = amount / (REGSIZE_BYTES * 2);
+        if (stpCount <= 4)
+        {
+            imm = -16 * stpCount;
+            emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
+
+            imm = -imm;
+            while (stpCount != 0)
+            {
+                imm -= 8;
+                emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm);
+                imm -= 8;
+                emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm);
+                stpCount -= 1;
+            }
+
+            lastTouchDelta = 0;
+
+            goto ALLOC_DONE;
+        }
+        else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <=
+        {
+            // Since the size is less than a page, simply adjust the SP value.
+            // The SP might already be in the guard page, so we must touch it BEFORE
+            // the alloc, not after.
+
+            // ld_w r0, 0(SP)
+            emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SP, 0);
+
+            lastTouchDelta = amount;
+            imm = -(ssize_t)amount;
+            assert(-8192 <= imm && imm < 0);
+            if (-2048 <= imm && imm < 0)
+                emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
+            else
+            {
+                emit->emitIns_R_R_I(INS_srai_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3);
+                emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm >> 3);
+                emit->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3);
+            }
+
+            goto ALLOC_DONE;
+        }
+
+        // else, "mov regCnt, amount"
+        // If compInitMem=true, we can reuse targetReg as regcnt.
+        // Since size is a constant, regCnt is not yet initialized.
+        assert(regCnt == REG_NA);
+        if (compiler->info.compInitMem)
+        {
+            assert(tree->AvailableTempRegCount() == 0);
+            regCnt = targetReg;
+        }
+        else
+        {
+            regCnt = tree->ExtractTempReg();
+        }
+        genSetRegToIcon(regCnt, amount, ((unsigned int)amount == amount) ? TYP_INT : TYP_LONG);
+    }
+
+    if (compiler->info.compInitMem)
+    {
+        // At this point 'regCnt' is set to the total number of bytes to locAlloc.
+        // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid
+        // by tickling the pages, we will just push 0's on the stack.
+        //
+        // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2
+        // and localloc size is a multiple of STACK_ALIGN.
+
+        // Loop:
+        ssize_t imm = -16;
+        emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
+
+        emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, 8);
+        emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, 0);
+
+        // If not done, loop
+        // Note that regCnt is the number of bytes to stack allocate.
+        // Therefore we need to subtract 16 from regcnt here.
+        assert(genIsValidIntReg(regCnt));
+
+        emit->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(type), regCnt, regCnt, -16);
+
+        assert(imm == (-4 << 2));//goto loop.
+        emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2));
+
+        lastTouchDelta = 0;
+    }
+    else
+    {
+        // At this point 'regCnt' is set to the total number of bytes to localloc.
+        //
+        // We don't need to zero out the allocated memory. However, we do have
+        // to tickle the pages to ensure that SP is always valid and is
+        // in sync with the "stack guard page".  Note that in the worst
+        // case SP is on the last byte of the guard page.  Thus you must
+        // touch SP-0 first not SP-0x1000.
+        //
+        // This is similar to the prolog code in CodeGen::genAllocLclFrame().
+        //
+        // Note that we go through a few hoops so that SP never points to
+        // illegal pages at any time during the tickling process.
+        //
+        //       sltu     R21, SP, regCnt
+        //       sub_d    regCnt, SP, regCnt      // regCnt now holds ultimate SP
+        //       masknez  regCnt, regCnt, R21     // Overflow, pick lowest possible value
+        //
+        //       lu12i_w regTmp, eeGetPageSize()>>12
+        //  Loop:
+        //       ld_w   r0, 0(SP)               // tickle the page - read from the page
+        //       sub_d  R21, SP, regTmp         // decrement SP by eeGetPageSize()
+        //       bltu   R21, regCnt, Done
+        //       sub_d  SP, SP,regTmp
+        //       b     Loop
+        //
+        //  Done:
+        //       mov   SP, regCnt
+        //
+
+        // Setup the regTmp
+        regNumber regTmp = tree->GetSingleTempReg();
+
+        assert(regCnt != REG_R21);
+        emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, REG_R21, REG_SPBASE, regCnt);
+
+        //// dsubu  regCnt, SP, regCnt      // regCnt now holds ultimate SP
+        emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
+
+        // Overflow, set regCnt to lowest possible value
+        emit->emitIns_R_R_R(INS_masknez, EA_PTRSIZE, regCnt, regCnt, REG_R21);
+
+        assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize()>>12)<<12));
+        emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regTmp, compiler->eeGetPageSize()>>12);
+
+        //genDefineTempLabel(loop);
+
+        // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
+        emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SPBASE, 0);
+
+        // decrement SP by eeGetPageSize()
+        emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_R21, REG_SPBASE, regTmp);
+
+        assert(regTmp != REG_R21);
+
+        ssize_t imm = 3 << 2;//goto done.
+        emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, REG_R21, regCnt, imm);
+
+        emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, regTmp);
+
+        imm = -4 << 2;
+        // Jump to loop and tickle new stack address
+        emit->emitIns_I(INS_b, EA_PTRSIZE, imm);
+
+        // Done with stack tickle loop
+        //genDefineTempLabel(done);
+
+        // Now just move the final value to SP
+        emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0);
+
+        // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
+        // we're going to assume the worst and probe.
+    }
+
+ALLOC_DONE:
+    // Re-adjust SP to allocate outgoing arg area. We must probe this adjustment.
+    if (stackAdjustment != 0)
+    {
+        assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+        assert((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) || (lastTouchDelta >= 0));
+
+        const regNumber tmpReg = rsGetRsvdReg();
+
+        if ((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) ||
+            (stackAdjustment + (unsigned)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES >
+             compiler->eeGetPageSize()))
+        {
+            genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, tmpReg);
+        }
+        else
+        {
+            genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, tmpReg);
+        }
+
+        // Return the stackalloc'ed address in result register.
+        // TargetReg = SP + stackAdjustment.
+        //
+        genInstrWithConstant(INS_addi_d, EA_PTRSIZE, targetReg, REG_SPBASE, (ssize_t)stackAdjustment, tmpReg);
+    }
+    else // stackAdjustment == 0
+    {
+        // Move the final value of SP to targetReg
+        GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, targetReg, REG_SPBASE, 0);
+    }
+
+BAILOUT:
+    if (endLabel != nullptr)
+        genDefineTempLabel(endLabel);
+
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForNegNot: Produce code for a GT_NEG/GT_NOT node.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForNegNot(GenTree* tree)
+{
+    assert(tree->OperIs(GT_NEG, GT_NOT));
+
+    var_types targetType = tree->TypeGet();
+
+    assert(!tree->OperIs(GT_NOT) || !varTypeIsFloating(targetType));
+
+    regNumber   targetReg = tree->GetRegNum();
+    instruction ins       = genGetInsForOper(tree);
+
+    // The arithmetic node must be sitting in a register (since it's not contained)
+    assert(!tree->isContained());
+    // The dst can only be a register.
+    assert(targetReg != REG_NA);
+
+    GenTree* operand = tree->gtGetOp1();
+    assert(!operand->isContained());
+    // The src must be a register.
+    regNumber operandReg = genConsumeReg(operand);
+
+    emitAttr attr = emitActualTypeSize(tree);
+    GetEmitter()->emitIns_R_R(ins, attr, targetReg, operandReg);
+
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForBswap: Produce code for a GT_BSWAP / GT_BSWAP16 node.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForBswap(GenTree* tree)
+{
+    assert(!"unimpleement on LOONGAARCH64 yet");
+}
+
+//------------------------------------------------------------------------
+// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD:
+// (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph;
+// (2) float/double MOD is morphed into a helper call by front-end.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* tree)
+{//can amend further.
+    assert(tree->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV));
+
+    var_types targetType = tree->TypeGet();
+    emitter*  emit       = GetEmitter();
+
+    genConsumeOperands(tree);
+
+    if (varTypeIsFloating(targetType))
+    {
+        // Floating point divide never raises an exception
+        assert(varTypeIsFloating(tree->gtOp1));
+        assert(varTypeIsFloating(tree->gtOp2));
+        assert(tree->gtOper == GT_DIV);
+        //genCodeForBinary(tree);
+        instruction ins = genGetInsForOper(tree);
+        emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(), tree->gtOp2->GetRegNum());
+    }
+    else // an integer divide operation
+    {
+        GenTree* divisorOp = tree->gtGetOp2();
+        // divisorOp can be immed or reg
+        assert(!divisorOp->isContained() || divisorOp->isContainedIntOrIImmed());
+
+        if (divisorOp->IsIntegralConst(0) || divisorOp->GetRegNum() == REG_R0)
+        {
+            // We unconditionally throw a divide by zero exception
+            genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO);
+        }
+        else // the divisor is not the constant zero
+        {
+            GenTree* src1 = tree->gtOp1;
+            unsigned TypeSize = genTypeSize(genActualType(tree->TypeGet()));
+            emitAttr size = EA_ATTR(TypeSize);
+
+            assert(TypeSize >= genTypeSize(genActualType(src1->TypeGet()))
+                && TypeSize >= genTypeSize(genActualType(divisorOp->TypeGet())));
+
+            //ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal;
+            regNumber Reg1 = src1->GetRegNum();
+            regNumber divisorReg = divisorOp->GetRegNum();
+            instruction ins;
+
+            // Check divisorOp first as we can always allow it to be a contained immediate
+            if (divisorOp->isContainedIntOrIImmed())
+            {
+                ssize_t intConst = (int)(divisorOp->AsIntCon()->gtIconVal);
+                divisorReg = REG_R21;
+                if ((-2048 <= intConst) && (intConst <= 0x7ff))
+                    emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst);
+                else
+                {
+                    emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, intConst >> 12);
+                    emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, intConst & 0xfff);
+                }
+            }
+            // Only for commutative operations do we check src1 and allow it to be a contained immediate
+            else if (tree->OperIsCommutative())
+            {
+                // src1 can be immed or reg
+                assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+
+                // Check src1 and allow it to be a contained immediate
+                if (src1->isContainedIntOrIImmed())
+                {
+                    assert(!divisorOp->isContainedIntOrIImmed());
+                    ssize_t intConst = (int)(src1->AsIntCon()->gtIconVal);
+                    Reg1 = REG_R21;
+                    if ((-2048 <= intConst) && (intConst <= 0x7ff))
+                        emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst);
+                    else
+                    {
+                        emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, intConst >> 12);
+                        emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, intConst & 0xfff);
+                    }
+                }
+            }
+            else
+            {
+                // src1 can only be a reg
+                assert(!src1->isContained());
+            }
+
+            // Generate the require runtime checks for GT_DIV or GT_UDIV
+            if (tree->gtOper == GT_DIV || tree->gtOper == GT_MOD)
+            {
+                // Two possible exceptions:
+                //     (AnyVal /  0) => DivideByZeroException
+                //     (MinInt / -1) => ArithmeticException
+                //
+                bool checkDividend = true;
+
+                // Do we have an immediate for the 'divisorOp'?
+                //
+                if (divisorOp->IsCnsIntOrI())
+                {
+                    ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal;
+                    //assert(intConstValue != 0); // already checked above by IsIntegralConst(0)
+                    if (intConstValue != -1)
+                    {
+                        checkDividend = false; // We statically know that the dividend is not -1
+                    }
+                }
+                else // insert check for divison by zero
+                {
+                    // Check if the divisor is zero throw a DivideByZeroException
+                    genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg);
+                }
+
+                if (checkDividend)
+                {
+                    // Check if the divisor is not -1 branch to 'sdivLabel'
+                    emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1);
+                    BasicBlock* sdivLabel = genCreateTempLabel();//can optimize for loongarch64.
+                    emit->emitIns_J_cond_la(INS_bne, sdivLabel, REG_R21, divisorReg);
+
+                    // If control flow continues past here the 'divisorReg' is known to be -1
+                    regNumber dividendReg = tree->gtGetOp1()->GetRegNum();
+                    // At this point the divisor is known to be -1
+                    //
+                    // Wether dividendReg is MinInt or not
+                    //
+
+                    emit->emitIns_J_cond_la(INS_beq, sdivLabel, dividendReg, REG_R0);
+
+                    emit->emitIns_R_R_R(size == EA_4BYTE ? INS_add_w : INS_add_d, size, REG_R21, dividendReg, dividendReg);
+                    genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, REG_R21);
+                    genDefineTempLabel(sdivLabel);
+                }
+
+                // Generate the sdiv instruction
+                if (size == EA_4BYTE)
+                {
+                    if (tree->OperGet() == GT_DIV)
+                    {
+                        ins = INS_div_w;
+                    }
+                    else
+                        ins = INS_mod_w;
+                }
+                else
+                {
+                    if (tree->OperGet() == GT_DIV)
+                    {
+                        ins = INS_div_d;
+                    }
+                    else
+                        ins = INS_mod_d;
+                }
+
+                emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg);
+            }
+            else //if (tree->gtOper == GT_UDIV) GT_UMOD
+            {
+                // Only one possible exception
+                //     (AnyVal /  0) => DivideByZeroException
+                //
+                // Note that division by the constant 0 was already checked for above by the
+                // op2->IsIntegralConst(0) check
+                //
+
+                if (!divisorOp->IsCnsIntOrI())
+                {
+                    // divisorOp is not a constant, so it could be zero
+                    //
+                    genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg);
+                }
+
+                if (size == EA_4BYTE)
+                {
+                    if (tree->OperGet() == GT_UDIV)
+                    {
+                        ins = INS_div_wu;
+                    }
+                    else
+                        ins = INS_mod_wu;
+
+                    //TODO: temp workround, should amend for optimize.
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0);
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, divisorReg, divisorReg, 0);
+                }
+                else
+                {
+                    if (tree->OperGet() == GT_UDIV)
+                    {
+                        ins = INS_div_du;
+                    }
+                    else
+                        ins = INS_mod_du;
+                }
+
+                emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg);
+            }
+        }
+    }
+    genProduceReg(tree);
+}
+
+// Generate code for InitBlk by performing a loop unroll
+// Preconditions:
+//   a) Both the size and fill byte value are integer constants.
+//   b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
+{
+    assert(node->OperIs(GT_STORE_BLK));
+
+    unsigned  dstLclNum      = BAD_VAR_NUM;
+    regNumber dstAddrBaseReg = REG_NA;
+    int       dstOffset      = 0;
+    GenTree*  dstAddr        = node->Addr();
+
+    if (!dstAddr->isContained())
+    {
+        dstAddrBaseReg = genConsumeReg(dstAddr);
+    }
+    else if (dstAddr->OperIsAddrMode())
+    {
+        assert(!dstAddr->AsAddrMode()->HasIndex());
+
+        dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base());
+        dstOffset      = dstAddr->AsAddrMode()->Offset();
+    }
+    else
+    {
+        assert(dstAddr->OperIsLocalAddr());
+        dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum();
+        dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs();
+    }
+
+    regNumber srcReg;
+    GenTree*  src = node->Data();
+
+    if (src->OperIs(GT_INIT_VAL))
+    {
+        assert(src->isContained());
+        src = src->gtGetOp1();
+    }
+
+    if (!src->isContained())
+    {
+        srcReg = genConsumeReg(src);
+    }
+    else
+    {
+        assert(src->IsIntegralConst(0));
+        srcReg = REG_R0;
+    }
+
+    if (node->IsVolatile())
+    {
+        instGen_MemoryBarrier();
+    }
+
+    emitter* emit = GetEmitter();
+    unsigned size = node->GetLayout()->GetSize();
+
+    assert(size <= INT32_MAX);
+    assert(dstOffset < INT32_MAX - static_cast<int>(size));
+
+    for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; size -= regSize, dstOffset += regSize)
+    {
+        if (dstLclNum != BAD_VAR_NUM)
+        {
+            emit->emitIns_S_R(INS_st_d, EA_8BYTE, srcReg, dstLclNum, dstOffset);
+            emit->emitIns_S_R(INS_st_d, EA_8BYTE, srcReg, dstLclNum, dstOffset + 8);
+        }
+        else
+        {
+            emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset);
+            emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, srcReg, dstAddrBaseReg, dstOffset + 8);
+        }
+    }
+
+    for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, dstOffset += regSize)
+    {
+        while (regSize > size)
+        {
+            regSize /= 2;
+        }
+
+        instruction storeIns;
+        emitAttr    attr;
+
+        switch (regSize)
+        {
+            case 1:
+                storeIns = INS_st_b;
+                attr     = EA_4BYTE;
+                break;
+            case 2:
+                storeIns = INS_st_h;
+                attr     = EA_4BYTE;
+                break;
+            case 4:
+                storeIns = INS_st_w;
+                attr     = EA_ATTR(regSize);
+                break;
+            case 8:
+                storeIns = INS_st_d;
+                attr     = EA_ATTR(regSize);
+                break;
+            default:
+                unreached();
+        }
+
+        if (dstLclNum != BAD_VAR_NUM)
+        {
+            emit->emitIns_S_R(storeIns, attr, srcReg, dstLclNum, dstOffset);
+        }
+        else
+        {
+            emit->emitIns_R_R_I(storeIns, attr, srcReg, dstAddrBaseReg, dstOffset);
+        }
+    }
+}
+
+// Generate code for CpObj nodes wich copy structs that have interleaved
+// GC pointers.
+// For this case we'll generate a sequence of loads/stores in the case of struct
+// slots that don't contain GC pointers.  The generated code will look like:
+// ld tempReg, 8(A5)
+// sd tempReg, 8(A6)
+//
+// In the case of a GC-Pointer we'll call the ByRef write barrier helper
+// who happens to use the same registers as the previous call to maintain
+// the same register requirements and register killsets:
+// bl CORINFO_HELP_ASSIGN_BYREF
+//
+// So finally an example would look like this:
+// ld tempReg, 8(A5)
+// sd tempReg, 8(A6)
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ld tempReg, 8(A5)
+// sd tempReg, 8(A6)
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ld tempReg, 8(A5)
+// sd tempReg, 8(A6)
+void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
+{
+    GenTree*  dstAddr       = cpObjNode->Addr();
+    GenTree*  source        = cpObjNode->Data();
+    var_types srcAddrType   = TYP_BYREF;
+    bool      sourceIsLocal = false;
+
+    assert(source->isContained());
+    if (source->gtOper == GT_IND)
+    {
+        GenTree* srcAddr = source->gtGetOp1();
+        assert(!srcAddr->isContained());
+        srcAddrType = srcAddr->TypeGet();
+    }
+    else
+    {
+        noway_assert(source->IsLocal());
+        sourceIsLocal = true;
+    }
+
+    bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr();
+
+#ifdef DEBUG
+    assert(!dstAddr->isContained());
+
+    // This GenTree node has data about GC pointers, this means we're dealing
+    // with CpObj.
+    assert(cpObjNode->GetLayout()->HasGCPtr());
+#endif // DEBUG
+
+    // Consume the operands and get them into the right registers.
+    // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+    genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA);
+    gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType);
+    gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
+
+    ClassLayout* layout = cpObjNode->GetLayout();
+    unsigned     slots  = layout->GetSlotCount();
+
+    // Temp register(s) used to perform the sequence of loads and stores.
+    regNumber tmpReg  = cpObjNode->ExtractTempReg();
+    regNumber tmpReg2 = REG_NA;
+
+    assert(genIsValidIntReg(tmpReg));
+    assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF);
+    assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF);
+
+    if (slots > 1)
+    {
+        tmpReg2 = cpObjNode->GetSingleTempReg();
+        assert(tmpReg2 != tmpReg);
+        assert(genIsValidIntReg(tmpReg2));
+        assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
+        assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF);
+    }
+
+    if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a full memory barrier before a volatile CpObj operation
+        instGen_MemoryBarrier();
+    }
+
+    emitter* emit = GetEmitter();
+
+    emitAttr attrSrcAddr = emitActualTypeSize(srcAddrType);
+    emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());
+
+    // If we can prove it's on the stack we don't need to use the write barrier.
+    if (dstOnStack)
+    {
+        unsigned i = 0;
+        // Check if two or more remaining slots and use two ld/sd sequence
+        while (i < slots - 1)
+        {
+            emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0));
+            emitAttr attr1 = emitTypeSize(layout->GetGCPtrType(i + 1));
+
+            emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
+            emit->emitIns_R_R_I(INS_ld_d, attr1, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
+            emit->emitIns_R_R_I(INS_st_d, attr1, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE);
+            i += 2;
+        }
+
+        // Use a ld/sd sequence for the last remainder
+        if (i < slots)
+        {
+            emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0));
+
+            emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
+            emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
+            emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
+        }
+    }
+    else
+    {
+        unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();
+
+        unsigned i = 0;
+        while (i < slots)
+        {
+            if (!layout->IsGCPtr(i))
+            {
+                // Check if the next slot's type is also TYP_GC_NONE and use two ld/sd
+                if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
+                {
+                    emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
+                    emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
+                    emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE);
+                    ++i; // extra increment of i, since we are copying two items
+                }
+                else
+                {
+                    emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
+                    emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
+                    emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
+                }
+            }
+            else
+            {
+                // In the case of a GC-Pointer we'll call the ByRef write barrier helper
+                genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+                gcPtrCount--;
+            }
+            ++i;
+        }
+        assert(gcPtrCount == 0);
+    }
+
+    if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a INS_BARRIER_RMB after a volatile CpObj operation
+        ////TODO: there is only BARRIER_FULL for LOONGARCH64.
+        instGen_MemoryBarrier(BARRIER_FULL);
+    }
+
+    // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
+    // While we normally update GC info prior to the last instruction that uses them,
+    // these actually live into the helper call.
+    gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
+}
+
+// generate code do a switch statement based on a table of ip-relative offsets
+void CodeGen::genTableBasedSwitch(GenTree* treeNode)
+{
+    genConsumeOperands(treeNode->AsOp());
+    regNumber idxReg  = treeNode->AsOp()->gtOp1->GetRegNum();
+    regNumber baseReg = treeNode->AsOp()->gtOp2->GetRegNum();
+
+    regNumber tmpReg = treeNode->GetSingleTempReg();
+
+    // load the ip-relative offset (which is relative to start of fgFirstBB)
+    GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, idxReg, 2);
+    GetEmitter()->emitIns_R_R_R(INS_add_d, EA_8BYTE, baseReg, baseReg, REG_R21);
+    GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, baseReg, baseReg, 0);
+
+    // add it to the absolute address of fgFirstBB
+    //compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;//TODO for LOONGARCH64.
+    GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
+    GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, baseReg, baseReg, tmpReg);
+
+    // jr baseReg
+    GetEmitter()->emitIns_R_R_I(INS_jirl, emitActualTypeSize(TYP_I_IMPL), REG_R0, baseReg, 0);
+}
+
+// emits the table and an instruction to get the address of the first element
+void CodeGen::genJumpTable(GenTree* treeNode)
+{
+    noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+    assert(treeNode->OperGet() == GT_JMPTABLE);
+
+    unsigned     jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
+    BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+    unsigned     jmpTabOffs;
+    unsigned     jmpTabBase;
+
+    jmpTabBase = GetEmitter()->emitBBTableDataGenBeg(jumpCount, true);
+
+    jmpTabOffs = 0;
+
+    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", compiler->compMethodID, jmpTabBase);
+
+    for (unsigned i = 0; i < jumpCount; i++)
+    {
+        BasicBlock* target = *jumpTable++;
+        noway_assert(target->bbFlags & BBF_HAS_LABEL);
+
+        JITDUMP("            DD      L_M%03u_" FMT_BB "\n", compiler->compMethodID, target->bbNum);
+
+        GetEmitter()->emitDataGenData(i, target);
+    };
+
+    GetEmitter()->emitDataGenEnd();
+
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+    GetEmitter()->emitIns_R_C(INS_bl, emitActualTypeSize(TYP_I_IMPL), treeNode->GetRegNum(), REG_NA,
+                              compiler->eeFindJitDataOffs(jmpTabBase), 0);
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node.
+//
+// Arguments:
+//    treeNode - the GT_XADD/XCHG node
+//
+void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node.
+//
+// Arguments:
+//    tree - the GT_CMPXCHG node
+//
+void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+static inline bool isImmed(GenTree* treeNode)
+{
+        if (treeNode->gtGetOp1()->isContainedIntOrIImmed())
+        {
+            return true;
+        }
+        else if (treeNode->OperIsBinary())
+        {
+            if (treeNode->gtGetOp2()->isContainedIntOrIImmed())
+                return true;
+        }
+
+        return false;
+}
+
+instruction CodeGen::genGetInsForOper(GenTree* treeNode)
+{
+    var_types  type = treeNode->TypeGet();
+    genTreeOps oper = treeNode->OperGet();
+    GenTree*   op1  = treeNode->gtGetOp1();
+    GenTree*   op2;
+    emitAttr   attr = emitActualTypeSize(treeNode);
+    bool isImm = false;
+
+    instruction ins = INS_break;
+
+    if (varTypeIsFloating(type))
+    {
+        switch (oper)
+        {
+            case GT_ADD:
+                if (attr == EA_4BYTE)
+                    ins = INS_fadd_s;
+                else
+                    ins = INS_fadd_d;
+                break;
+            case GT_SUB:
+                if (attr == EA_4BYTE)
+                    ins = INS_fsub_s;
+                else
+                    ins = INS_fsub_d;
+                break;
+            case GT_MUL:
+                if (attr == EA_4BYTE)
+                    ins = INS_fmul_s;
+                else
+                    ins = INS_fmul_d;
+                break;
+            case GT_DIV:
+                if (attr == EA_4BYTE)
+                    ins = INS_fdiv_s;
+                else
+                    ins = INS_fdiv_d;
+                break;
+            case GT_NEG:
+                if (attr == EA_4BYTE)
+                    ins = INS_fneg_s;
+                else
+                    ins = INS_fneg_d;
+                break;
+
+            default:
+                NYI("Unhandled oper in genGetInsForOper() - float");
+                unreached();
+                break;
+        }
+    }
+    else
+    {
+        switch (oper)
+        {
+            case GT_ADD:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                    {
+                        ins = INS_addi_d;
+                    }
+                    else
+                    {
+                        assert(attr == EA_4BYTE);
+                        ins = INS_addi_w;
+                    }
+                }
+                else
+                {
+                    if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                    {
+                        ins = INS_add_d;
+                    }
+                    else
+                    {
+                        assert(attr == EA_4BYTE);
+                        ins = INS_add_w;
+                    }
+                }
+                break;
+
+            case GT_SUB:
+                isImm = isImmed(treeNode);
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_sub_d;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_sub_w;
+                }
+                break;
+
+            case GT_MOD:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_mod_d;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_mod_w;
+                }
+                break;
+
+            case GT_DIV:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_div_d;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_div_w;
+                }
+                break;
+
+            case GT_UMOD:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_mod_du;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_mod_wu;
+                }
+                break;
+
+            case GT_UDIV:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    ins = INS_div_du;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_div_wu;
+                }
+                break;
+
+            case GT_MUL:
+                if ((attr == EA_8BYTE) || (attr == EA_BYREF))
+                {
+                    //if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
+                    //    ins = INS_mul_d;
+                    //else
+                        ins = INS_mul_d;
+                }
+                else
+                {
+                    if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
+                        ins = INS_mulw_d_wu;
+                    else
+                        ins = INS_mul_w;
+                }
+                break;
+
+            case GT_NEG:
+                if (attr == EA_8BYTE)
+                {
+                    ins = INS_dneg;
+                }
+                else
+                {
+                    assert(attr == EA_4BYTE);
+                    ins = INS_neg;
+                }
+                break;
+
+            case GT_NOT:
+                ins = INS_not;
+                break;
+
+            case GT_AND:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    ins = INS_andi;
+                }
+                else
+                {
+                    ins = INS_and;
+                }
+                break;
+
+            case GT_OR:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    ins = INS_ori;
+                }
+                else
+                {
+                    ins = INS_or;
+                }
+                break;
+
+            case GT_LSH:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    //it's better to check sa.
+                    if (attr == EA_4BYTE)
+                        ins = INS_slli_w;
+                    else
+                        ins = INS_slli_d;
+                }
+                else
+                {
+                    if (attr == EA_4BYTE)
+                        ins = INS_sll_w;
+                    else
+                        ins = INS_sll_d;
+                }
+                break;
+
+            case GT_RSZ:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    //it's better to check sa.
+                    if (attr == EA_4BYTE)
+                        ins = INS_srli_w;
+                    else
+                        ins = INS_srli_d;
+                }
+                else
+                {
+                    if (attr == EA_4BYTE)
+                        ins = INS_srl_w;
+                    else
+                        ins = INS_srl_d;
+                }
+                break;
+
+            case GT_RSH:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    //it's better to check sa.
+                    if (attr == EA_4BYTE)
+                        ins = INS_srai_w;
+                    else
+                        ins = INS_srai_d;
+                }
+                else
+                {
+                    if (attr == EA_4BYTE)
+                        ins = INS_sra_w;
+                    else
+                        ins = INS_sra_d;
+                }
+                break;
+
+            case GT_ROR:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    //it's better to check sa.
+                    if (attr == EA_4BYTE)
+                        ins = INS_rotri_w;
+                    else
+                        ins = INS_rotri_d;
+                }
+                else
+                {
+                    if (attr == EA_4BYTE)
+                        ins = INS_rotr_w;
+                    else
+                        ins = INS_rotr_d;
+                }
+                break;
+
+            case GT_XOR:
+                isImm = isImmed(treeNode);
+                if (isImm)
+                {
+                    ins = INS_xori;
+                }
+                else
+                {
+                    ins = INS_xor;
+                }
+                break;
+
+            default:
+                NYI("Unhandled oper in genGetInsForOper() - integer");
+                unreached();
+                break;
+        }
+    }
+    return ins;
+}
+
+//------------------------------------------------------------------------
+// genCodeForReturnTrap: Produce code for a GT_RETURNTRAP node.
+//
+// Arguments:
+//    tree - the GT_RETURNTRAP node
+//
+void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
+{
+    assert(tree->OperGet() == GT_RETURNTRAP);
+
+    // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+    // based on the contents of 'data'
+
+    GenTree* data = tree->gtOp1;
+    genConsumeRegs(data);
+
+    BasicBlock* skipLabel = genCreateTempLabel();
+    GetEmitter()->emitIns_J_cond_la(INS_beq, skipLabel, data->GetRegNum(), REG_R0);
+
+    void* pAddr = nullptr;
+    void* addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr);
+    emitter::EmitCallType callType;
+    regNumber callTarget;
+
+    if (addr == nullptr)
+    {
+        callType = emitter::EC_INDIR_R;
+        callTarget = REG_DEFAULT_HELPER_CALL_TARGET;
+
+        //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+        if (compiler->opts.compReloc)
+        {
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+        }
+        else
+        {
+            //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
+            //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2);
+        }
+        regSet.verifyRegUsed(callTarget);
+        //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
+    }
+    else
+    {
+        callType = emitter::EC_FUNC_TOKEN;
+        callTarget = REG_NA;
+    }
+
+    ////TODO: can optimize further !!!
+    GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), INDEBUG_LDISASM_COMMA(nullptr) addr, 0,
+                               EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                               gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
+                               callTarget,                             /* ireg */
+                               REG_NA, 0, 0,                           /* xreg, xmul, disp */
+                               false                                   /* isJump */
+                               );
+
+    genDefineTempLabel(skipLabel);
+
+    regMaskTP killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+    regSet.verifyRegistersUsed(killMask);
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreInd: Produce code for a GT_STOREIND node.
+//
+// Arguments:
+//    tree - the GT_STOREIND node
+//
+void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
+{
+#ifdef FEATURE_SIMD
+    // Storing Vector3 of size 12 bytes through indirection
+    if (tree->TypeGet() == TYP_SIMD12)
+    {
+        genStoreIndTypeSIMD12(tree);
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    GenTree* data = tree->Data();
+    GenTree* addr = tree->Addr();
+
+    GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data);
+    if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+    {
+        // data and addr must be in registers.
+        // Consume both registers so that any copies of interfering
+        // registers are taken care of.
+        genConsumeOperands(tree);
+
+        // At this point, we should not have any interference.
+        // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
+        //  as that is where 'addr' must go.
+        noway_assert(data->GetRegNum() != REG_WRITE_BARRIER_DST_BYREF);
+
+        // 'addr' goes into x14 (REG_WRITE_BARRIER_DST)
+        genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST);
+
+        // 'data' goes into x15 (REG_WRITE_BARRIER_SRC)
+        genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC);
+
+        genGCWriteBarrier(tree, writeBarrierForm);
+    }
+    else // A normal store, not a WriteBarrier store
+    {
+        // We must consume the operands in the proper execution order,
+        // so that liveness is updated appropriately.
+        genConsumeAddress(addr);
+
+        if (!data->isContained())
+        {
+            genConsumeRegs(data);
+        }
+
+        regNumber dataReg;
+        if (data->isContainedIntOrIImmed())
+        {
+            assert(data->IsIntegralConst(0));
+            dataReg = REG_R0;
+        }
+        else // data is not contained, so evaluate it into a register
+        {
+            assert(!data->isContained());
+            dataReg = data->GetRegNum();
+        }
+
+        var_types   type = tree->TypeGet();
+        instruction ins  = ins_Store(type);
+
+        if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+        {
+            // issue a full memory barrier before a volatile StInd
+            instGen_MemoryBarrier();
+        }
+
+        GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), dataReg, tree);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForSwap: Produce code for a GT_SWAP node.
+//
+// Arguments:
+//    tree - the GT_SWAP node
+//
+void CodeGen::genCodeForSwap(GenTreeOp* tree)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void CodeGen::genIntToFloatCast(GenTree* treeNode)
+{
+    // int type --> float/double conversions are always non-overflow ones
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->GetRegNum();
+    assert(genIsValidFloatReg(targetReg));
+
+    GenTree* op1 = treeNode->AsOp()->gtOp1;
+    assert(!op1->isContained());             // Cannot be contained
+    assert(genIsValidIntReg(op1->GetRegNum())); // Must be a valid int reg.
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = genActualType(op1->TypeGet());
+    assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+    emitter *emit = GetEmitter();
+    emitAttr attr = emitActualTypeSize(dstType);
+
+    // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
+    emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+    noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
+
+    bool IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED;
+    instruction ins = INS_invalid;
+
+    genConsumeOperands(treeNode->AsOp());
+
+    if (IsUnsigned)
+    {//should amend.
+        emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, REG_SCRATCH_FLT, op1->GetRegNum()); // save op1
+
+        if (srcSize == EA_8BYTE)
+        {
+            ssize_t imm = 4 << 2;
+            emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm);
+
+            emit->emitIns_R_R_I(INS_andi, EA_8BYTE, REG_R21, op1->GetRegNum(), 1);
+            emit->emitIns_R_R_I(INS_srli_d, EA_8BYTE, op1->GetRegNum(), op1->GetRegNum(), 1);
+            emit->emitIns_R_R_R(INS_or, EA_8BYTE, op1->GetRegNum(), op1->GetRegNum(), REG_R21);
+        }
+        else
+        {
+            srcSize = EA_8BYTE;
+            emit->emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, op1->GetRegNum(), REG_R0, 63, 32);
+        }
+    }
+
+    ins = srcSize == EA_8BYTE ? INS_movgr2fr_d : INS_movgr2fr_w;
+    emit->emitIns_R_R(ins, attr, treeNode->GetRegNum(), op1->GetRegNum());
+
+    if (dstType == TYP_DOUBLE)
+    {
+        if (srcSize == EA_4BYTE)
+        {
+            ins = INS_ffint_d_w;
+        }
+        else
+        {
+            assert(srcSize == EA_8BYTE);
+            ins = INS_ffint_d_l;
+        }
+    }
+    else
+    {
+        assert(dstType == TYP_FLOAT);
+        if (srcSize == EA_4BYTE)
+        {
+            ins = INS_ffint_s_w;
+        }
+        else
+        {
+            assert(srcSize == EA_8BYTE);
+            ins = INS_ffint_s_l;
+        }
+    }
+
+    emit->emitIns_R_R(ins, attr, treeNode->GetRegNum(), treeNode->GetRegNum());
+
+    if (IsUnsigned)
+    {
+        srcSize = EA_ATTR(genTypeSize(srcType));
+        emit->emitIns_R_R(INS_movfr2gr_d, attr, op1->GetRegNum(), REG_SCRATCH_FLT); // recover op1
+
+        if (srcSize == EA_8BYTE)
+        {
+            ssize_t imm = 3 << 2;
+            emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm);
+
+            emit->emitIns_R_R(dstType == TYP_DOUBLE ? INS_fmov_d : INS_fmov_s, attr, REG_SCRATCH_FLT, treeNode->GetRegNum());
+            emit->emitIns_R_R_R(dstType == TYP_DOUBLE ? INS_fadd_d : INS_fadd_s, attr, treeNode->GetRegNum(), REG_SCRATCH_FLT, treeNode->GetRegNum());
+        }
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+void CodeGen::genFloatToIntCast(GenTree* treeNode)
+{
+    // we don't expect to see overflow detecting float/double --> int type conversions here
+    // as they should have been converted into helper calls by front-end.
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->GetRegNum();
+    assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
+
+    GenTree* op1 = treeNode->AsOp()->gtOp1;
+    assert(!op1->isContained());               // Cannot be contained
+    assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg.
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+    // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
+    // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
+    // we expect the front-end or lowering phase to have generated two levels of cast.
+    //
+    emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+    noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
+
+    instruction ins1 = INS_invalid;
+    instruction ins2 = INS_invalid;
+    bool IsUnsigned = varTypeIsUnsigned(dstType);
+
+    regNumber tmpReg = REG_SCRATCH_FLT;
+    assert(tmpReg != op1->GetRegNum());
+
+    if (srcType == TYP_DOUBLE)
+    {
+        if (dstSize == EA_4BYTE)
+        {
+            ins1 = INS_ftintrz_w_d;
+            ins2 = INS_movfr2gr_s;
+        }
+        else
+        {
+            assert(dstSize == EA_8BYTE);
+            ins1 = INS_ftintrz_l_d;
+            ins2 = INS_movfr2gr_d;
+        }
+    }
+    else
+    {
+        assert(srcType == TYP_FLOAT);
+        if (dstSize == EA_4BYTE)
+        {
+            ins1 = INS_ftintrz_w_s;
+            ins2 = INS_movfr2gr_s;
+        }
+        else
+        {
+            assert(dstSize == EA_8BYTE);
+            ins1 = INS_ftintrz_l_s;
+            ins2 = INS_movfr2gr_d;
+        }
+    }
+
+    genConsumeOperands(treeNode->AsOp());
+
+    if (IsUnsigned)
+    {
+        ssize_t imm = 0;
+
+        if (srcType == TYP_DOUBLE)
+        {
+            if (dstSize == EA_4BYTE)
+            {
+                imm = 0x41e00;
+            }
+            else
+            {
+                imm = 0x43e00;
+            }
+        }
+        else
+        {
+            assert(srcType == TYP_FLOAT);
+            if (dstSize == EA_4BYTE)
+            {
+                imm = 0x4f000;
+            }
+            else
+            {
+                imm = 0x5f000;
+            }
+        }
+
+        //{
+        //    GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, tmpReg, REG_R0);
+
+        //    GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_c_olt_d : INS_c_olt_s, EA_8BYTE, op1->GetRegNum(), tmpReg, 2);
+        //    GetEmitter()->emitIns_I_I(INS_bc1f, EA_PTRSIZE, 2, 4 << 2);
+
+        //    GetEmitter()->emitIns_R_R_I(INS_ori*/, EA_PTRSIZE, treeNode->GetRegNum(), REG_R0, 0);
+        //    GetEmitter()->emitIns_I(INS_b, EA_PTRSIZE, srcType == TYP_DOUBLE ? 14 << 2 : 13 << 2);
+        //}
+
+        if (srcType == TYP_DOUBLE)
+            GetEmitter()->emitIns_R_R_I(INS_lu52i_d, EA_8BYTE, REG_R21, REG_R0, imm >> 8);
+        else
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm);
+
+        GetEmitter()->emitIns_R_R(srcType == TYP_DOUBLE ? INS_movgr2fr_d : INS_movgr2fr_w, EA_8BYTE, tmpReg, REG_R21);
+
+        GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_fcmp_clt_d : INS_fcmp_clt_s, EA_8BYTE, op1->GetRegNum(), tmpReg, 2);
+
+        GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 0);
+        GetEmitter()->emitIns_I_I(INS_bcnez, EA_PTRSIZE, 2, 4 << 2);
+
+        GetEmitter()->emitIns_R_R_R(srcType == TYP_DOUBLE ? INS_fsub_d : INS_fsub_s, EA_8BYTE, tmpReg, op1->GetRegNum(), tmpReg);
+
+        GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 1);
+        GetEmitter()->emitIns_R_R_I(dstSize == EA_8BYTE ? INS_slli_d : INS_slli_w, EA_PTRSIZE, REG_R21, REG_R21, dstSize == EA_8BYTE ? 63 : 31);
+
+        GetEmitter()->emitIns_R_R_R_I(INS_fsel, EA_PTRSIZE, tmpReg, tmpReg, op1->GetRegNum(), 2);
+
+        GetEmitter()->emitIns_R_R(ins1, dstSize, tmpReg, tmpReg);
+        GetEmitter()->emitIns_R_R(ins2, dstSize, treeNode->GetRegNum(), tmpReg);
+
+        GetEmitter()->emitIns_R_R_R(INS_or, dstSize, treeNode->GetRegNum(), REG_R21, treeNode->GetRegNum());
+    }
+    else
+    {
+        GetEmitter()->emitIns_R_R(ins1, dstSize, tmpReg, op1->GetRegNum());
+        GetEmitter()->emitIns_R_R(ins2, dstSize, treeNode->GetRegNum(), tmpReg);
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCkfinite: Generate code for ckfinite opcode.
+//
+// Arguments:
+//    treeNode - The GT_CKFINITE node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    GT_CKFINITE node has reserved an internal register.
+//
+void CodeGen::genCkfinite(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_CKFINITE);
+
+    GenTree*  op1         = treeNode->AsOp()->gtOp1;
+    var_types targetType  = treeNode->TypeGet();
+    ssize_t   expMask     = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent.
+    ssize_t   size        = (targetType == TYP_FLOAT) ? 8 : 11;  // Bit size to extract exponent.
+    ssize_t   pos         = (targetType == TYP_FLOAT) ? 23 : 52; // Bit pos of exponent.
+
+    emitter* emit = GetEmitter();
+    emitAttr attr = emitActualTypeSize(treeNode);
+
+    // Extract exponent into a register.
+    regNumber intReg = treeNode->GetSingleTempReg();
+    regNumber fpReg  = genConsumeReg(op1);
+
+    emit->emitIns_R_R(attr == EA_8BYTE ? INS_movfr2gr_d : INS_movfr2gr_s, attr, intReg, fpReg);
+
+    // Mask of exponent with all 1's and check if the exponent is all 1's
+    instruction ins = (targetType == TYP_FLOAT) ? INS_bstrpick_w : INS_bstrpick_d;
+    emit->emitIns_R_R_I_I(ins, EA_PTRSIZE, intReg, intReg, pos+size-1, pos);
+    emit->emitIns_R_R_I(INS_xori, attr, intReg, intReg, expMask);
+
+    genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, intReg);
+
+    // if it is a finite value copy it to targetReg
+    if (treeNode->GetRegNum() != fpReg)
+    {
+        emit->emitIns_R_R(ins_Copy(targetType), attr, treeNode->GetRegNum(), fpReg);
+    }
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCodeForCompare: Produce code for a GT_EQ/GT_NE/GT_LT/GT_LE/GT_GE/GT_GT node.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForCompare(GenTreeOp* jtree)
+{
+    emitter* emit = GetEmitter();
+
+    GenTreeOp* tree = nullptr;
+    regNumber targetReg;
+    if (jtree->OperIs(GT_JTRUE))
+    {
+        tree = jtree->gtGetOp1()->AsOp();
+        targetReg = REG_RA;
+        assert(tree->GetRegNum() == REG_NA);
+
+        jtree->gtOp2 = (GenTree*)REG_RA;//targetReg
+        jtree->SetRegNum((regNumber)INS_bnez);
+    }
+    else
+    {
+        tree = jtree;
+        targetReg = tree->GetRegNum();
+    }
+    assert(targetReg != REG_NA);
+
+    GenTree*  op1     = tree->gtOp1;
+    GenTree*  op2     = tree->gtOp2;
+    var_types op1Type = genActualType(op1->TypeGet());
+    var_types op2Type = genActualType(op2->TypeGet());
+
+    assert(!op1->isUsedFromMemory());
+    assert(!op2->isUsedFromMemory());
+
+    genConsumeOperands(tree);
+
+    emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
+
+    assert(genTypeSize(op1Type) == genTypeSize(op2Type));
+
+    if (varTypeIsFloating(op1Type))
+    {
+        assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+        bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0;
+
+        if(IsUnordered)
+        {
+            if(tree->OperIs(GT_LT))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_LE))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_EQ))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_NE))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_GT))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_GE))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/);
+        }
+        else
+        {
+            if(tree->OperIs(GT_LT))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_LE))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_EQ))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_NE))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_GT))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/);
+            else if(tree->OperIs(GT_GE))
+                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/);
+        }
+
+        emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0);
+        emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1/*cc*/);
+    }
+    else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed())
+    {
+        ssize_t imm1 = op1->AsIntCon()->gtIconVal;
+        ssize_t imm2 = op2->AsIntCon()->gtIconVal;
+
+        assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+
+        bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
+        instruction ins = INS_beqz;
+
+        switch (cmpSize)
+        {
+        case EA_4BYTE:
+            {
+                imm1 = static_cast<int32_t>(imm1);
+                imm2 = static_cast<int32_t>(imm2);
+            }
+            break;
+        case EA_8BYTE:
+            break;
+        case EA_1BYTE:
+            {
+                imm1 = static_cast<int8_t>(imm1);
+                imm2 = static_cast<int8_t>(imm2);
+            }
+            break;
+        //case EA_2BYTE:
+        //    imm = static_cast<uint16_t>(imm);
+        //    break;
+        default:
+            assert(!"Unexpected type in jumpCompare.");
+        }
+
+        switch (tree->OperGet())
+        {
+        case GT_LT:
+            if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2)))
+            {
+                ins = INS_b;
+            }
+            break;
+        case GT_LE:
+            if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2)))
+            {
+                ins = INS_b;
+            }
+            break;
+        case GT_EQ:
+            if (imm1 == imm2)
+            {
+                ins = INS_b;
+            }
+            break;
+        case GT_NE:
+            if (imm1 != imm2)
+            {
+                ins = INS_b;
+            }
+            break;
+        case GT_GT:
+            if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2)))
+            {
+                ins = INS_b;
+            }
+            break;
+        case GT_GE:
+            if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2)))
+            {
+                ins = INS_b;
+            }
+            break;
+        default:
+            break;
+        }
+
+        assert(ins != INS_invalid);
+        jtree->gtOp2 = (GenTree*)REG_SP;
+        jtree->SetRegNum((regNumber)ins);
+    }
+    else
+    {
+        //TODO:can optimize further.
+        if (op1->isContainedIntOrIImmed())
+        {
+            op1 = tree->gtOp2;
+            op2 = tree->gtOp1;
+            switch (tree->OperGet())
+            {
+            case GT_LT:
+                tree->SetOper(GT_GT);
+                break;
+            case GT_LE:
+                tree->SetOper(GT_GE);
+                break;
+            case GT_GT:
+                tree->SetOper(GT_LT);
+                break;
+            case GT_GE:
+                tree->SetOper(GT_LE);
+                break;
+            default:
+                break;
+            }
+        }
+        assert(!op1->isContainedIntOrIImmed());
+        assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+
+        bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
+        regNumber regOp1 = op1->GetRegNum();
+
+        if (op2->isContainedIntOrIImmed())
+        {
+            ssize_t imm = op2->AsIntCon()->gtIconVal;
+
+            {
+                switch (cmpSize)
+                {
+                case EA_4BYTE:
+                    imm = static_cast<int32_t>(imm);
+                    break;
+                case EA_8BYTE:
+                    break;
+                case EA_1BYTE:
+                    imm = static_cast<int8_t>(imm);
+                    break;
+                //case EA_2BYTE:
+                //    imm = static_cast<uint16_t>(imm);
+                //    break;
+                default:
+                    assert(!"Unexpected type in jumpTrue(imm).");
+                }
+            }
+
+            if (tree->OperIs(GT_LT))
+            {
+                if (!IsUnsigned && isValidSimm12(imm)) {
+                    emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
+                }
+                else if (IsUnsigned && isValidUimm11(imm)) {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm);
+                }
+                else {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                }
+            }
+            else if (tree->OperIs(GT_LE))
+            {
+                if (!IsUnsigned && isValidSimm12(imm + 1)) {
+                    emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1);
+                }
+                else if (IsUnsigned && isValidUimm11(imm + 1)) {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1);
+                }
+                else {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm + 1);
+                    emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                }
+            }
+            else if (tree->OperIs(GT_GT))
+            {
+                if (!IsUnsigned && isValidSimm12(imm + 1)) {
+                    emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_RA, regOp1, imm + 1);
+                    emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1);
+                }
+                else if (IsUnsigned && isValidUimm11(imm + 1)) {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, REG_RA, regOp1, imm + 1);
+                    emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1);
+                }
+                else {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1);
+                }
+            }
+            else if (tree->OperIs(GT_GE))
+            {   if (!IsUnsigned && isValidSimm12(imm)) {
+                    emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
+                }
+                else if (IsUnsigned && isValidUimm11(imm)) {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm);
+                }
+                else {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                }
+                emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
+            }
+            else if (tree->OperIs(GT_NE))
+            {
+                if (!imm) {
+                    emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1);
+                }
+                else if (isValidUimm12(imm)) {
+                    emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
+                    emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
+                }
+                else {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                    emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
+                }
+            }
+            else if (tree->OperIs(GT_EQ))
+            {
+                if (!imm) {
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, 1);
+                }
+                else if (isValidUimm12(imm)) {
+                    emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
+                }
+                else {
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
+                    emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
+                    emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
+                }
+            }
+
+            genProduceReg(tree);
+        }
+        else
+        {
+            regNumber tmpRegOp1 = tree->ExtractTempReg();
+            regNumber tmpRegOp2 = tree->ExtractTempReg();
+            regNumber regOp2 = op2->GetRegNum();
+            if (cmpSize == EA_4BYTE)
+            {
+                regOp1 = tmpRegOp1;
+                regOp2 = tmpRegOp2;
+                if (IsUnsigned)
+                {
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, tmpRegOp1, op1->GetRegNum(), 31, 0);
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, tmpRegOp2, op2->GetRegNum(), 31, 0);
+                }
+                else
+                {
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp1, op1->GetRegNum(), 0);
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp2, op2->GetRegNum(), 0);
+                }
+            }
+
+            if (tree->OperIs(GT_LT))
+            {
+                emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2);
+            }
+            else if (tree->OperIs(GT_LE))
+            {
+                emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1);
+                emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
+            }
+            else if (tree->OperIs(GT_GT))
+            {
+                emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp2, regOp1);
+            }
+            else if (tree->OperIs(GT_GE))
+            {
+                emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_8BYTE, targetReg, regOp1, regOp2);
+                emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, targetReg, 1);
+            }
+            else if (tree->OperIs(GT_NE))
+            {
+                emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2);
+                emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
+            }
+            else if (tree->OperIs(GT_EQ))
+            {
+                emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2);
+                emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
+            }
+
+            genProduceReg(tree);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForJumpTrue: Generate code for a GT_JTRUE node.
+//
+// Arguments:
+//    jtrue - The node
+//
+void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
+{
+    //assert(compiler->compCurBB->bbJumpKind == BBJ_COND);//should confirm.
+    ////assert(jtrue->OperIs(GT_JTRUE));
+
+    emitter* emit = GetEmitter();
+
+    GenTreeOp* tree = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue;
+    regNumber targetReg = tree->GetRegNum();
+    instruction ins = INS_invalid;
+
+    if (jtrue->OperIs(GT_JTRUE) && jtrue->gtOp2)
+    {
+        emit->emitIns_J((instruction)jtrue->GetRegNum(), compiler->compCurBB->bbJumpDest, (int)(int64_t)jtrue->gtOp2);//5-bits;
+        jtrue->SetRegNum(REG_NA);
+        jtrue->gtOp2 = nullptr;
+        return;
+    }
+    else
+    {
+        GenTree*  op1 = tree->gtOp1;
+        GenTree*  op2 = tree->gtOp2;
+
+        var_types op1Type = genActualType(op1->TypeGet());
+        var_types op2Type = genActualType(op2->TypeGet());
+
+        bool IsEq = tree == jtrue->gtPrev;
+
+        assert(!op1->isUsedFromMemory());
+        assert(!op2->isUsedFromMemory());
+
+        genConsumeOperands(tree);
+
+        emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type));
+
+        assert(targetReg == REG_NA);
+        int SaveCcResultReg = (int)REG_RA << 5;
+
+        if (varTypeIsFloating(op1Type))
+        {
+            assert(genTypeSize(op1Type) == genTypeSize(op2Type));
+            //int cc = 1;
+
+            assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+            bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0;
+
+            if (tree->OperIs(GT_EQ))
+            {
+                ins = INS_bcnez;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_s : INS_fcmp_ceq_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_d : INS_fcmp_ceq_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            }
+            else if (tree->OperIs(GT_NE))
+            {
+                ins = INS_bceqz;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_s : INS_fcmp_cueq_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_d : INS_fcmp_cueq_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            }
+            else if (tree->OperIs(GT_LT))
+            {
+                ins = INS_bcnez;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_s : INS_fcmp_clt_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_d : INS_fcmp_clt_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            }
+            else if (tree->OperIs(GT_LE))
+            {
+                ins = INS_bcnez;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_s : INS_fcmp_cle_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_d : INS_fcmp_cle_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            }
+            else if (tree->OperIs(GT_GE))
+            {
+                ins = INS_bceqz;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_s : INS_fcmp_cult_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_d : INS_fcmp_cult_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            }
+            else if (tree->OperIs(GT_GT))
+            {
+                ins = INS_bceqz;
+                if (cmpSize == EA_4BYTE)
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_s : INS_fcmp_cule_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                else
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_d : INS_fcmp_cule_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+            }
+
+            //assert(0 <= cc && cc < 8);
+            if (IsEq)
+                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1/*cc*/);//5-bits;
+            else
+            {
+                jtrue->gtOp2 = (GenTree*)(1/*cc*/);
+                jtrue->SetRegNum((regNumber)ins);
+            }
+        }
+        else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed())
+        {
+            ssize_t imm1 = op1->AsIntCon()->gtIconVal;
+            ssize_t imm2 = op2->AsIntCon()->gtIconVal;
+
+            assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+
+            bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
+
+            switch (cmpSize)
+            {
+            case EA_4BYTE:
+                {
+                    imm1 = static_cast<int32_t>(imm1);
+                    imm2 = static_cast<int32_t>(imm2);
+                }
+                break;
+            case EA_8BYTE:
+                break;
+            case EA_1BYTE:
+                {
+                    imm1 = static_cast<int8_t>(imm1);
+                    imm2 = static_cast<int8_t>(imm2);
+                }
+                break;
+            //case EA_2BYTE:
+            //    imm = static_cast<uint16_t>(imm);
+            //    break;
+            default:
+                assert(!"Unexpected type in jumpTrue.");
+            }
+            switch (tree->OperGet())
+            {
+            case GT_LT:
+                if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2)))
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_LE:
+                if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2)))
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_EQ:
+                if (imm1 == imm2)
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_NE:
+                if (imm1 != imm2)
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_GT:
+                if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2)))
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_GE:
+                if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2)))
+                {
+                    ins = INS_b;
+                }
+                break;
+            default:
+                break;
+            }
+
+            if (IsEq && (ins != INS_invalid))
+                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0);//5-bits;
+            else if (ins != INS_invalid)
+            {
+                jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg;
+                jtrue->SetRegNum((regNumber)ins);
+            }
+        }
+        else
+        {
+            //TODO:can optimize further.
+            if (op1->isContainedIntOrIImmed())
+            {
+                op1 = tree->gtOp2;
+                op2 = tree->gtOp1;
+                switch (tree->OperGet())
+                {
+                case GT_LT:
+                    tree->SetOper(GT_GT);
+                    break;
+                case GT_LE:
+                    tree->SetOper(GT_GE);
+                    break;
+                case GT_GT:
+                    tree->SetOper(GT_LT);
+                    break;
+                case GT_GE:
+                    tree->SetOper(GT_LE);
+                    break;
+                default:
+                    break;
+                }
+            }
+
+            assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
+
+            bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
+
+            regNumber regOp1 = op1->GetRegNum();
+
+            if (op2->isContainedIntOrIImmed())
+            {
+                ssize_t imm = op2->AsIntCon()->gtIconVal;
+
+                if (imm)
+                {
+                    switch (cmpSize)
+                    {
+                    case EA_4BYTE:
+                        if (op1->gtFlags & GTF_UNSIGNED)
+                            imm = static_cast<uint32_t>(imm);
+                        else
+                            imm = static_cast<int32_t>(imm);
+                        break;
+                    case EA_8BYTE:
+                        break;
+                    case EA_1BYTE:
+                        imm = static_cast<int8_t>(imm);
+                        break;
+                    //case EA_2BYTE:
+                    //    imm = static_cast<uint16_t>(imm);
+                    //    break;
+                    default:
+                        assert(!"Unexpected type in jumpTrue(imm).");
+                    }
+
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);//TODO: maybe optimize.
+                }
+                else
+                {
+                    SaveCcResultReg = 0;
+                }
+
+                if (tree->OperIs(GT_LT)) {
+                    SaveCcResultReg |= ((int)regOp1);
+                    ins = IsUnsigned ? INS_bltu : INS_blt;
+                }
+                else if (tree->OperIs(GT_LE)) {
+                    SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5);
+                    ins = IsUnsigned ? INS_bgeu : INS_bge;
+                }
+                else if (tree->OperIs(GT_GT)) {
+                    SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5);
+                    ins = IsUnsigned ? INS_bltu : INS_blt;
+                }
+                else if (tree->OperIs(GT_GE)) {
+                    SaveCcResultReg |= ((int)regOp1);
+                    ins = IsUnsigned ? INS_bgeu : INS_bge;
+                }
+                else if (tree->OperIs(GT_NE)) {
+                    SaveCcResultReg |= ((int)regOp1);
+                    ins = INS_bne;
+                }
+                else if (tree->OperIs(GT_EQ)) {
+                    SaveCcResultReg |= ((int)regOp1);
+                    ins = INS_beq;
+                }
+            }
+            else
+            {
+                regNumber regOp2 = op2->GetRegNum();
+                if (IsUnsigned  && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
+                {//TODO: should amend further!!!
+                    regNumber tmpRegOp1 = tree->ExtractTempReg();
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
+                    regOp1 = tmpRegOp1;
+                    regOp2 = REG_RA;
+                }
+                else if (IsUnsigned  && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) && compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate())
+                {//TODO: should amend further!!!
+                    regNumber tmpRegOp1 = tree->ExtractTempReg();
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
+                    regOp1 = tmpRegOp1;
+                    regOp2 = REG_RA;
+                }
+                else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
+                {//TODO: should amend further!!!
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, regOp2, 0);
+                    regOp2 = REG_RA;
+                }
+                else if (cmpSize == EA_4BYTE && ((op1->gtFlags | op2->gtFlags) & GTF_UNSIGNED))
+                {//TODO: should amend further!!!
+                    if (!(op1->gtFlags & GTF_UNSIGNED))
+                    {
+                        regNumber tmpRegOp1 = tree->ExtractTempReg();
+                        emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
+                        regOp1 = tmpRegOp1;
+                    }
+                    if (!(op2->gtFlags & GTF_UNSIGNED))
+                    {
+                        emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
+                        regOp2 = REG_RA;
+                    }
+                }
+
+                if (tree->OperIs(GT_LT)) {
+                    SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5));
+                    ins = IsUnsigned ? INS_bltu : INS_blt;
+                }
+                else if (tree->OperIs(GT_LE)) {
+                    SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
+                    ins = IsUnsigned ? INS_bgeu : INS_bge;
+                }
+                else if (tree->OperIs(GT_GT)) {
+                    SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
+                    ins = IsUnsigned ? INS_bltu : INS_blt;
+                }
+                else if (tree->OperIs(GT_GE)) {
+                    SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5));
+                    ins = IsUnsigned ? INS_bgeu : INS_bge;
+                }
+                else if (tree->OperIs(GT_NE)) {
+                    SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
+                    ins = INS_bne;
+                }
+                else if (tree->OperIs(GT_EQ)) {
+                    SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
+                    ins = INS_beq;
+                }
+            }
+
+            if (IsEq)
+                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg);//5-bits;
+            else
+            {
+                jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg;
+                jtrue->SetRegNum((regNumber)ins);
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForJumpCompare: Generates code for jmpCompare statement.
+//
+// A GT_JCMP node is created when a comparison and conditional branch
+// can be executed in a single instruction.
+//
+// LOONGARCH64 has a few instructions with this behavior.
+//   - beq/bne -- Compare and branch register equal/not equal
+//
+// The beq/bne supports the normal +/- 2^15 branch range for conditional branches
+//
+// A GT_JCMP beq/bne node is created when there is a GT_EQ or GT_NE
+// integer/unsigned comparison against the value of Rt register which is used by
+// a GT_JTRUE condition jump node.
+//
+// This node is repsonsible for consuming the register, and emitting the
+// appropriate fused compare/test and branch instruction
+//
+// Two flags guide code generation
+//    GTF_JCMP_EQ  -- Set if this is beq rather than bne
+//
+// Arguments:
+//    tree - The GT_JCMP tree node.
+//
+// Return Value:
+//    None
+//
+void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
+{
+    assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+    GenTree* op1 = tree->gtGetOp1();
+    GenTree* op2 = tree->gtGetOp2();
+
+    assert(tree->OperIs(GT_JCMP));
+    assert(!varTypeIsFloating(tree));
+    assert(!op1->isUsedFromMemory());
+    assert(!op2->isUsedFromMemory());
+    assert(op2->IsCnsIntOrI());
+    assert(op2->isContained());
+
+    genConsumeOperands(tree);
+
+    regNumber reg  = op1->GetRegNum();
+    emitAttr  attr = emitActualTypeSize(op1->TypeGet());
+
+    //if (tree->gtFlags & GTF_JCMP_TST)
+    //{
+    //    assert(!"unimplemented on LOONGARCH yet");
+    //    //ssize_t compareImm = op2->AsIntCon()->IconValue();
+
+    //    //assert(isPow2(compareImm));
+
+    //    //instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_tbz : INS_tbnz;
+    //    //int         imm = genLog2((size_t)compareImm);
+
+    //    //GetEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm);
+    //}
+    //else
+    {
+        instruction ins;
+        int regs;
+        if (op2->AsIntCon()->gtIconVal)
+        {
+            assert(reg != REG_R21);
+            GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, op2->AsIntCon()->gtIconVal);
+            regs = (int)reg << 5;
+            regs |= (int)REG_R21;//REG_R21
+            ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne;
+        }
+        else
+        {
+            regs = (int)reg;
+            ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez;
+        }
+
+        GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs);//5-bits;
+    }
+}
+
+//---------------------------------------------------------------------
+// genSPtoFPdelta - return offset from the stack pointer (Initial-SP) to the frame pointer. The frame pointer
+// will point to the saved frame pointer slot (i.e., there will be frame pointer chaining).
+//
+int CodeGenInterface::genSPtoFPdelta() const
+{
+    assert(isFramePointerUsed());
+
+    int delta;
+    if (IsSaveFpRaWithAllCalleeSavedRegisters())
+    {
+        //delta = (compiler->compCalleeRegsPushed -2)* REGSIZE_BYTES + compiler->compLclFrameSize;
+        //assert(delta == genTotalFrameSize() - compiler->lvaArgSize - 2*8);
+        delta = genTotalFrameSize() - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
+                (compiler->compCalleeRegsPushed -1)* REGSIZE_BYTES;
+    }
+    else
+    {
+        delta = compiler->lvaOutgoingArgSpaceSize;
+    }
+
+    assert(delta >= 0);
+    return delta;
+}
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the total size of the stack frame, including local size,
+// callee-saved register size, etc.
+//
+// Return value:
+//    Total frame size
+//
+
+int CodeGenInterface::genTotalFrameSize() const
+{
+    // For varargs functions, we home all the incoming register arguments. They are not
+    // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but
+    // since we don't use "push" instructions to save them, we don't have to do the
+    // save of these varargs register arguments as the first thing in the prolog.
+
+    assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+    int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) +
+                         compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+    assert(totalFrameSize > 0);
+    return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genCallerSPtoFPdelta() const
+{
+    assert(isFramePointerUsed());
+    int callerSPtoFPdelta;
+
+    callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
+
+    assert(callerSPtoFPdelta <= 0);
+    return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta() const
+{
+    int callerSPtoSPdelta = 0;
+
+    callerSPtoSPdelta -= genTotalFrameSize();
+
+    assert(callerSPtoSPdelta <= 0);
+    return callerSPtoSPdelta;
+}
+
+//---------------------------------------------------------------------
+// SetSaveFpRaWithAllCalleeSavedRegisters - Set the variable that indicates if FP/RA registers
+// are stored with the rest of the callee-saved registers.
+void CodeGen::SetSaveFpRaWithAllCalleeSavedRegisters(bool value)
+{
+    JITDUMP("Setting genSaveFpRaWithAllCalleeSavedRegisters to %s\n", dspBool(value));
+    genSaveFpRaWithAllCalleeSavedRegisters = value;
+}
+
+//---------------------------------------------------------------------
+// IsSaveFpRaWithAllCalleeSavedRegisters - Return the value that indicates where FP/RA registers
+// are stored in the prolog.
+bool CodeGen::IsSaveFpRaWithAllCalleeSavedRegisters() const
+{
+    return genSaveFpRaWithAllCalleeSavedRegisters;
+}
+
+/*****************************************************************************
+ *  Emit a call to a helper function.
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
+{
+    void* addr  = nullptr;
+    void* pAddr = nullptr;
+
+    emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+    addr                           = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
+    regNumber callTarget           = REG_NA;
+
+    if (addr == nullptr)
+    {
+        // This is call to a runtime helper.
+        // li x, pAddr     #NOTE: this maybe muti-instructions.
+        // ld x, [x]
+        // jr x
+
+        if (callTargetReg == REG_NA)
+        {
+            // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
+            // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
+            callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
+        }
+
+        regMaskTP callTargetMask = genRegMask(callTargetReg);
+        regMaskTP callKillSet    = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+
+        // assert that all registers in callTargetMask are in the callKillSet
+        noway_assert((callTargetMask & callKillSet) == callTargetMask);
+
+        callTarget = callTargetReg;
+
+        //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+        //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
+        if (compiler->opts.compReloc)
+        {
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+        }
+        else
+        {
+            //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
+            //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2);
+        }
+        regSet.verifyRegUsed(callTarget);
+
+        callType = emitter::EC_INDIR_R;
+    }
+
+    GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
+                               retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                               gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
+                               callTarget,                             /* ireg */
+                               REG_NA, 0, 0,                           /* xreg, xmul, disp */
+                               false                                   /* isJump */
+                               );
+
+    regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+    regSet.verifyRegistersUsed(killMask);
+}
+
+#ifdef FEATURE_SIMD
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsic: Generate code for a SIMD Intrinsic.  This is the main
+// routine which in turn calls appropriate genSIMDIntrinsicXXX() routine.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Currently, we only recognize SIMDVector<float> and SIMDVector<int>, and
+//    a limited set of methods.
+//
+// TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp.
+void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+    return INS_OPTS_NONE;
+}
+
+// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic
+//
+// Arguments:
+//   intrinsicId    -   SIMD intrinsic Id
+//   baseType       -   Base type of the SIMD vector
+//   immed          -   Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
+//
+//
+// Return Value:
+//   Instruction (op) to be used, and immed is set if instruction requires an immediate operand.
+//
+instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+    return INS_invalid;
+}
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//-------------------------------------------------------------------------------------------
+// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes
+//                        a number of arguments equal to the length of the Vector.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//----------------------------------------------------------------------------------
+// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicWiden: Generate code for SIMD Intrinsic Widen operations
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Notes:
+//    The Widen intrinsics are broken into separate intrinsics for the two results.
+//
+void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicNarrow: Generate code for SIMD Intrinsic Narrow operations
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Notes:
+//    This intrinsic takes two arguments. The first operand is narrowed to produce the
+//    lower elements of the results, and the second operand produces the high elements.
+//
+void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations
+// add, sub, mul, bit-wise And, AndNot and Or.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater
+// == and !=
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------------------
+// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------------------
+// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//-----------------------------------------------------------------------------
+// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD16 vector to
+//                            the given register, if any, or to memory.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    The upper half of all SIMD registers are volatile, even the callee-save registers.
+//    When a 16-byte SIMD value is live across a call, the register allocator will use this intrinsic
+//    to cause the upper half to be saved.  It will first attempt to find another, unused, callee-save
+//    register.  If such a register cannot be found, it will save it to an available caller-save register.
+//    In that case, this node will be marked GTF_SPILL, which will cause this method to save
+//    the upper half to the lclVar's home location.
+//
+void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//-----------------------------------------------------------------------------
+// genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD16 vector to
+//                               the given register, if any, or to memory.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always
+//    have their home register, this node has its targetReg on the lclVar child, and its source
+//    on the simdNode.
+//    Regarding spill, please see the note above on genSIMDIntrinsicUpperSave.  If we have spilled
+//    an upper-half to the lclVar's home location, this node will be marked GTF_SPILLED.
+//
+void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//-----------------------------------------------------------------------------
+// genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two writes: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node that is attempting to store indirect
+//
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//-----------------------------------------------------------------------------
+// genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two loads: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node of GT_IND
+//
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//-----------------------------------------------------------------------------
+// genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two stores: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node that is attempting to store TYP_SIMD12 field
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+#include "hwintrinsic.h"
+
+instruction CodeGen::getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+    return INS_invalid;
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsic: Produce code for a GT_HWINTRINSIC node.
+//
+// This is the main routine which in turn calls the genHWIntrinsicXXX() routines.
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicUnaryOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form UnaryOp.
+//
+// Consumes one scalar operand produces a scalar
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicCrcOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form CrcOp.
+//
+// Consumes two scalar operands and produces a scalar result
+//
+// This form differs from BinaryOp because the attr depends on the size of op2
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdBinaryOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form SimdBinaryOp.
+//
+// Consumes two SIMD operands and produces a SIMD result
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSwitchTable: generate the jump-table for imm-intrinsics
+//    with non-constant argument
+//
+// Arguments:
+//    swReg      - register containing the switch case to execute
+//    tmpReg     - temporary integer register for calculating the switch indirect branch target
+//    swMax      - the number of switch cases.
+//    emitSwCase - lambda to generate an individual switch case
+//
+// Notes:
+//    Used for cases where an instruction only supports immediate operands,
+//    but at jit time the operand is not a constant.
+//
+//    The importer is responsible for inserting an upstream range check
+//    (GT_HW_INTRINSIC_CHK) for swReg, so no range check is needed here.
+//
+template <typename HWIntrinsicSwitchCaseBody>
+void CodeGen::genHWIntrinsicSwitchTable(regNumber                 swReg,
+                                        regNumber                 tmpReg,
+                                        int                       swMax,
+                                        HWIntrinsicSwitchCaseBody emitSwCase)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdExtractOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form SimdExtractOp.
+//
+// Consumes one SIMD operand and one scalar
+//
+// The element index operand is typically a const immediate
+// When it is not, a switch table is generated
+//
+// See genHWIntrinsicSwitchTable comments
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdInsertOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form SimdInsertOp.
+//
+// Consumes one SIMD operand and two scalars
+//
+// The element index operand is typically a const immediate
+// When it is not, a switch table is generated
+//
+// See genHWIntrinsicSwitchTable comments
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdSelectOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form SimdSelectOp.
+//
+// Consumes three SIMD operands and produces a SIMD result
+//
+// This intrinsic form requires one of the source registers to be the
+// destination register.  Inserts a INS_mov if this requirement is not met.
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdSetAllOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form SimdSetAllOp.
+//
+// Consumes single scalar operand and produces a SIMD result
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicSimdSetAllOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdUnaryOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form SimdUnaryOp.
+//
+// Consumes single SIMD operand and produces a SIMD result
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdBinaryRMWOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form SimdBinaryRMWOp.
+//
+// Consumes two SIMD operands and produces a SIMD result.
+// First operand is both source and destination.
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicSimdBinaryRMWOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicSimdTernaryRMWOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form SimdTernaryRMWOp
+//
+// Consumes three SIMD operands and produces a SIMD result.
+// First operand is both source and destination.
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicSimdTernaryRMWOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicShaHashOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form Sha1HashOp.
+// Used in LOONGARCH64 SHA1 Hash operations.
+//
+// Consumes three operands and returns a Simd result.
+// First Simd operand is both source and destination.
+// Second Operand is an unsigned int.
+// Third operand is a simd operand.
+
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicShaHashOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsicShaRotateOp:
+//
+// Produce code for a GT_HWINTRINSIC node with form Sha1RotateOp.
+// Used in LOONGARCH64 SHA1 Rotate operations.
+//
+// Consumes one integer operand and returns unsigned int result.
+//
+// Arguments:
+//    node - the GT_HWINTRINSIC node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+#endif // FEATURE_HW_INTRINSICS
+
+/*****************************************************************************
+ * Unit testing of the LOONGARCH64 emitter: generate a bunch of instructions into the prolog
+ * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
+ * disassembler thinks the instructions as the same as we do.
+ */
+
+// Uncomment "#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS" to run all the unit tests here.
+// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
+//#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS
+
+#if defined(DEBUG)
+void CodeGen::genLOONGARCH64EmitterUnitTests()
+{
+    if (!verbose)
+    {
+        return;
+    }
+
+    if (!compiler->opts.altJit)
+    {
+        // No point doing this in a "real" JIT.
+        return;
+    }
+
+    // Mark the "fake" instructions in the output.
+    printf("*************** In genLOONGARCH64EmitterUnitTests()\n");
+
+    printf("*************** End of genLOONGARCH64EmitterUnitTests()\n");
+}
+#endif // defined(DEBUG)
+
+//------------------------------------------------------------------------
+// genStackPointerConstantAdjustment: add a specified constant value to the stack pointer.
+// No probe is done.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP. Must be negative or zero.
+//    regTmp                  - an available temporary register that is used if 'spDelta' cannot be encoded by
+//                              'sub sp, sp, #spDelta' instruction.
+//                              Can be REG_NA if the caller knows for certain that 'spDelta' fits into the immediate
+//                              value range.
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp)
+{
+    assert(spDelta < 0);
+
+    // We assert that the SP change is less than one page. If it's greater, you should have called a
+    // function that does a probe, which will in turn call this function.
+    assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize());
+
+    if (-2048 <= spDelta && spDelta < 0)
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta);
+    else
+    {
+        GetEmitter()->emitIns_R_R_I(INS_srai_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3);
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta >> 3);
+        GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3);
+    }
+}
+
+//------------------------------------------------------------------------
+// genStackPointerConstantAdjustmentWithProbe: add a specified constant value to the stack pointer,
+// and probe the stack as appropriate. Should only be called as a helper for
+// genStackPointerConstantAdjustmentLoopWithProbe.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP. Must be negative or zero. If zero, the probe happens,
+//                              but the stack pointer doesn't move.
+//    regTmp                  - temporary register to use as target for probe load instruction
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp)
+{
+    GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, regTmp, REG_SP, 0);
+    genStackPointerConstantAdjustment(spDelta, regTmp);
+}
+
+//------------------------------------------------------------------------
+// genStackPointerConstantAdjustmentLoopWithProbe: Add a specified constant value to the stack pointer,
+// and probe the stack as appropriate. Generates one probe per page, up to the total amount required.
+// This will generate a sequence of probes in-line.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP. Must be negative.
+//    regTmp                  - temporary register to use as target for probe load instruction
+//
+// Return Value:
+//    Offset in bytes from SP to last probed address.
+//
+target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp)
+{
+    assert(spDelta < 0);
+
+    const target_size_t pageSize = compiler->eeGetPageSize();
+
+    ssize_t spRemainingDelta = spDelta;
+    do
+    {
+        ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize);
+        genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp);
+        spRemainingDelta -= spOneDelta;
+    } while (spRemainingDelta < 0);
+
+    // What offset from the final SP was the last probe? This depends on the fact that
+    // genStackPointerConstantAdjustmentWithProbe() probes first, then does "SUB SP".
+    target_size_t lastTouchDelta = (target_size_t)(-spDelta) % pageSize;
+    if ((lastTouchDelta == 0) || (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize))
+    {
+        // We haven't probed almost a complete page. If lastTouchDelta==0, then spDelta was an exact
+        // multiple of pageSize, which means we last probed exactly one page back. Otherwise, we probed
+        // the page, but very far from the end. If the next action on the stack might subtract from SP
+        // first, before touching the current SP, then we do one more probe at the very bottom. This can
+        // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV"
+        // strategy.
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, regTmp, REG_SP, 0);
+        lastTouchDelta = 0;
+    }
+
+    return lastTouchDelta;
+}
+
+//------------------------------------------------------------------------
+// genCodeForTreeNode Generate code for a single node in the tree.
+//
+// Preconditions:
+//    All operands have been evaluated.
+//
+void CodeGen::genCodeForTreeNode(GenTree* treeNode)
+{
+    regNumber targetReg  = treeNode->GetRegNum();
+    var_types targetType = treeNode->TypeGet();
+    emitter*  emit       = GetEmitter();
+
+#ifdef DEBUG
+    // Validate that all the operands for the current node are consumed in order.
+    // This is important because LSRA ensures that any necessary copies will be
+    // handled correctly.
+    lastConsumedNode = nullptr;
+    if (compiler->verbose)
+    {
+        unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+        compiler->gtDispLIRNode(treeNode, "Generating: ");
+    }
+#endif // DEBUG
+
+    // Is this a node whose value is already in a register?  LSRA denotes this by
+    // setting the GTF_REUSE_REG_VAL flag.
+    if (treeNode->IsReuseRegVal())
+    {
+        // For now, this is only used for constant nodes.
+        assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
+        JITDUMP("  TreeNode is marked ReuseReg\n");
+        return;
+    }
+
+    // contained nodes are part of their parents for codegen purposes
+    // ex : immediates, most LEAs
+    if (treeNode->isContained())
+    {
+        return;
+    }
+
+    switch (treeNode->gtOper)
+    {
+        case GT_START_NONGC:
+            GetEmitter()->emitDisableGC();
+            break;
+
+        case GT_START_PREEMPTGC:
+            // Kill callee saves GC registers, and create a label
+            // so that information gets propagated to the emitter.
+            gcInfo.gcMarkRegSetNpt(RBM_INT_CALLEE_SAVED);
+            genDefineTempLabel(genCreateTempLabel());
+            break;
+
+        case GT_PROF_HOOK:
+            // We should be seeing this only if profiler hook is needed
+            noway_assert(compiler->compIsProfilerHookNeeded());
+
+#ifdef PROFILING_SUPPORTED
+            // Right now this node is used only for tail calls. In future if
+            // we intend to use it for Enter or Leave hooks, add a data member
+            // to this node indicating the kind of profiler hook. For example,
+            // helper number can be used.
+            genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif // PROFILING_SUPPORTED
+            break;
+
+        case GT_LCLHEAP:
+            genLclHeap(treeNode);
+            break;
+
+        case GT_CNS_INT:
+            if ((targetType == TYP_DOUBLE) || (targetType == TYP_FLOAT))
+                treeNode->gtOper = GT_CNS_DBL;
+            FALLTHROUGH;
+        case GT_CNS_DBL:
+            genSetRegToConst(targetReg, targetType, treeNode);
+            genProduceReg(treeNode);
+            break;
+
+        case GT_NOT:
+        case GT_NEG:
+            genCodeForNegNot(treeNode);
+            break;
+
+        case GT_BSWAP:
+        case GT_BSWAP16:
+            genCodeForBswap(treeNode);
+            break;
+
+        case GT_MOD:
+        case GT_UMOD:
+        case GT_DIV:
+        case GT_UDIV:
+            genCodeForDivMod(treeNode->AsOp());
+            break;
+
+        case GT_OR:
+        case GT_XOR:
+        case GT_AND:
+            assert(varTypeIsIntegralOrI(treeNode));
+
+            FALLTHROUGH;
+
+        case GT_ADD:
+        case GT_SUB:
+        case GT_MUL:
+            genConsumeOperands(treeNode->AsOp());
+            genCodeForBinary(treeNode->AsOp());
+            break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROR:
+            genCodeForShift(treeNode);
+            break;
+
+        case GT_CAST:
+            genCodeForCast(treeNode->AsOp());
+            break;
+
+        case GT_BITCAST:
+            genCodeForBitCast(treeNode->AsOp());
+            break;
+
+        case GT_LCL_FLD_ADDR:
+        case GT_LCL_VAR_ADDR:
+            genCodeForLclAddr(treeNode);
+            break;
+
+        case GT_LCL_FLD:
+            genCodeForLclFld(treeNode->AsLclFld());
+            break;
+
+        case GT_LCL_VAR:
+            genCodeForLclVar(treeNode->AsLclVar());
+            break;
+
+        case GT_STORE_LCL_FLD:
+            genCodeForStoreLclFld(treeNode->AsLclFld());
+            break;
+
+        case GT_STORE_LCL_VAR:
+            genCodeForStoreLclVar(treeNode->AsLclVar());
+            break;
+
+        case GT_RETFILT:
+        case GT_RETURN:
+            genReturn(treeNode);
+            break;
+
+        case GT_LEA:
+            // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction.
+            genLeaInstruction(treeNode->AsAddrMode());
+            break;
+
+        case GT_INDEX_ADDR:
+            genCodeForIndexAddr(treeNode->AsIndexAddr());
+            break;
+
+        case GT_IND:
+            genCodeForIndir(treeNode->AsIndir());
+            break;
+
+        case GT_INC_SATURATE:
+            genCodeForIncSaturate(treeNode);
+            break;
+
+        case GT_MULHI:
+            genCodeForMulHi(treeNode->AsOp());
+            break;
+
+        case GT_SWAP:
+            genCodeForSwap(treeNode->AsOp());
+            break;
+
+        case GT_JMP:
+            genJmpMethod(treeNode);
+            break;
+
+        case GT_CKFINITE:
+            genCkfinite(treeNode);
+            break;
+
+        case GT_INTRINSIC:
+            genIntrinsic(treeNode);
+            break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            genSIMDIntrinsic(treeNode->AsSIMD());
+            break;
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+        case GT_HWINTRINSIC:
+            genHWIntrinsic(treeNode->AsHWIntrinsic());
+            break;
+#endif // FEATURE_HW_INTRINSICS
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+        case GT_CMP:
+            if (treeNode->GetRegNum() != REG_NA)
+            {
+                genCodeForCompare(treeNode->AsOp());
+            }
+            else if (!treeNode->gtNext)
+                genCodeForJumpTrue(treeNode->AsOp());
+            else if (!treeNode->gtNext->OperIs(GT_JTRUE))
+            {
+                GenTree* treeNode_next = treeNode->gtNext;
+                while (treeNode_next)
+                {
+                    if (treeNode_next->OperIs(GT_JTRUE))
+                        break;
+                    treeNode_next = treeNode_next->gtNext;
+                };
+                assert(treeNode_next->OperIs(GT_JTRUE));
+                //genCodeForJumpTrue(treeNode_next->AsOp());
+                genCodeForCompare(treeNode_next->AsOp());
+            }
+            break;
+
+        case GT_JTRUE:
+            genCodeForJumpTrue(treeNode->AsOp());
+            break;
+
+        case GT_JCMP:
+            genCodeForJumpCompare(treeNode->AsOp());
+            break;
+
+        case GT_JCC:
+            genCodeForJcc(treeNode->AsCC());
+            break;
+
+        case GT_SETCC:
+            genCodeForSetcc(treeNode->AsCC());
+            break;
+
+        case GT_RETURNTRAP:
+            genCodeForReturnTrap(treeNode->AsOp());
+            break;
+
+        case GT_STOREIND:
+            genCodeForStoreInd(treeNode->AsStoreInd());
+            break;
+
+        case GT_COPY:
+            // This is handled at the time we call genConsumeReg() on the GT_COPY
+            break;
+
+        case GT_FIELD_LIST:
+            // Should always be marked contained.
+            assert(!"LIST, FIELD_LIST nodes should always be marked contained.");
+            break;
+
+        case GT_PUTARG_STK:
+            genPutArgStk(treeNode->AsPutArgStk());
+            break;
+
+        case GT_PUTARG_REG:
+            genPutArgReg(treeNode->AsOp());
+            break;
+
+#if FEATURE_ARG_SPLIT
+        case GT_PUTARG_SPLIT:
+            genPutArgSplit(treeNode->AsPutArgSplit());
+            break;
+#endif // FEATURE_ARG_SPLIT
+
+        case GT_CALL:
+            genCallInstruction(treeNode->AsCall());
+            break;
+
+        case GT_MEMORYBARRIER:
+        {
+            CodeGen::BarrierKind barrierKind =
+                treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;
+
+            instGen_MemoryBarrier(barrierKind);
+            break;
+        }
+
+        case GT_XCHG:
+        case GT_XADD:
+            genLockedInstructions(treeNode->AsOp());
+            break;
+
+        case GT_CMPXCHG:
+            genCodeForCmpXchg(treeNode->AsCmpXchg());
+            break;
+
+        case GT_RELOAD:
+            // do nothing - reload is just a marker.
+            // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+            // into the register specified in this node.
+            break;
+
+        case GT_NOP:
+            break;
+
+        case GT_KEEPALIVE:
+            if (treeNode->AsOp()->gtOp1->isContained())
+            {
+                // For this case we simply need to update the lifetime of the local.
+                genUpdateLife(treeNode->AsOp()->gtOp1);
+            }
+            else
+            {
+                genConsumeReg(treeNode->AsOp()->gtOp1);
+            }
+            break;
+
+        case GT_NO_OP:
+            instGen(INS_nop);
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+#ifdef FEATURE_HW_INTRINSICS
+        case GT_HW_INTRINSIC_CHK:
+#endif // FEATURE_HW_INTRINSICS
+            genRangeCheck(treeNode);
+            break;
+
+        case GT_PHYSREG:
+            genCodeForPhysReg(treeNode->AsPhysReg());
+            break;
+
+        case GT_NULLCHECK:
+            genCodeForNullCheck(treeNode->AsIndir());
+            break;
+
+        case GT_CATCH_ARG:
+
+            noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+            /* Catch arguments get passed in a register. genCodeForBBlist()
+               would have marked it as holding a GC object, but not used. */
+
+            noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+            genConsumeReg(treeNode);
+            break;
+
+        case GT_PINVOKE_PROLOG:
+            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+            // the runtime side requires the codegen here to be consistent
+#ifdef PSEUDORANDOM_NOP_INSERTION
+            emit->emitDisableRandomNops();
+#endif // PSEUDORANDOM_NOP_INSERTION
+            break;
+
+        case GT_LABEL:
+            genPendingCallLabel = genCreateTempLabel();
+            emit->emitIns_R_L(INS_ld_d, EA_PTRSIZE, genPendingCallLabel, targetReg);
+            break;
+
+        case GT_STORE_OBJ:
+        case GT_STORE_DYN_BLK:
+        case GT_STORE_BLK:
+            genCodeForStoreBlk(treeNode->AsBlk());
+            break;
+
+        case GT_JMPTABLE:
+            genJumpTable(treeNode);
+            break;
+
+        case GT_SWITCH_TABLE:
+            genTableBasedSwitch(treeNode);
+            break;
+
+        case GT_ARR_INDEX:
+            genCodeForArrIndex(treeNode->AsArrIndex());
+            break;
+
+        case GT_ARR_OFFSET:
+            genCodeForArrOffset(treeNode->AsArrOffs());
+            break;
+
+        case GT_IL_OFFSET:
+            // Do nothing; these nodes are simply markers for debug info.
+            break;
+
+        default:
+        {
+#ifdef DEBUG
+            char message[256];
+            _snprintf_s(message, ArrLen(message), _TRUNCATE, "NYI: Unimplemented node type %s",
+                        GenTree::OpName(treeNode->OperGet()));
+            NYIRAW(message);
+#else
+            NYI("unimplemented node");
+#endif
+        }
+        break;
+    }
+}
+
+//------------------------------------------------------------------------
+// genSetRegToIcon: Generate code that will set the given register to the integer constant.
+//
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type)
+{
+    // Reg cannot be a FP reg
+    assert(!genIsValidFloatReg(reg));
+
+    // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+    // relocatable.  Other ref type constants (e.g. string objects) go through a different
+    // code path.
+    noway_assert(type != TYP_REF || val == 0);
+
+    GetEmitter()->emitIns_I_la(emitActualTypeSize(type), reg, val);
+    regSet.verifyRegUsed(reg);
+}
+
+//---------------------------------------------------------------------
+// genSetGSSecurityCookie: Set the "GS" security cookie in the prolog.
+//
+// Arguments:
+//     initReg        - register to use as a scratch register
+//     pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
+//                      this call sets 'initReg' to a non-zero value.
+//
+// Return Value:
+//     None
+//
+void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (!compiler->getNeedsGSSecurityCookie())
+    {
+        return;
+    }
+
+    if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+    {
+        noway_assert(compiler->gsGlobalSecurityCookieVal != 0);
+        // initReg = #GlobalSecurityCookieVal; [frame.GSSecurityCookie] = initReg
+        genSetRegToIcon(initReg, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+        GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0);
+    }
+    else
+    {
+        //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, initReg, initReg, 0);
+        if (compiler->opts.compReloc)
+        {
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        }
+        else
+        {
+            //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, initReg, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000)>>12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff)>>2);
+        }
+        regSet.verifyRegUsed(initReg);
+        GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0);
+    }
+
+    *pInitRegZeroed = false;
+}
+
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+//    treeNode - the GT_INTRINSIC node
+//
+// Return value:
+//    None
+//
+void CodeGen::genIntrinsic(GenTree* treeNode)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for a GT_PUTARG_STK node
+//
+// Arguments
+//    treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
+{
+    assert(treeNode->OperIs(GT_PUTARG_STK));
+    GenTree*  source     = treeNode->gtOp1;
+    var_types targetType = genActualType(source->TypeGet());
+    emitter*  emit       = GetEmitter();
+
+    // This is the varNum for our store operations,
+    // typically this is the varNum for the Outgoing arg space
+    // When we are generating a tail call it will be the varNum for arg0
+    unsigned varNumOut    = (unsigned)-1;
+    unsigned argOffsetMax = (unsigned)-1; // Records the maximum size of this area for assert checks
+
+    // Get argument offset to use with 'varNumOut'
+    // Here we cross check that argument offset hasn't changed from lowering to codegen since
+    // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+    unsigned argOffsetOut = treeNode->getArgOffset();
+
+#ifdef DEBUG
+    fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode);
+    assert(curArgTabEntry != nullptr);
+    DEBUG_ARG_SLOTS_ASSERT(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE));
+#endif // DEBUG
+
+    // Whether to setup stk arg in incoming or out-going arg area?
+    // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+    // All other calls - stk arg is setup in out-going arg area.
+    if (treeNode->putInIncomingArgArea())
+    {
+        varNumOut    = getFirstArgWithStackSlot();
+        argOffsetMax = compiler->compArgSize;
+#if FEATURE_FASTTAILCALL
+        // This must be a fast tail call.
+        assert(treeNode->gtCall->IsFastTailCall());
+
+        // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+        // because fast tail call requires that in-coming arg area of caller is >= out-going
+        // arg area required for tail call.
+        LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
+        assert(varDsc != nullptr);
+#endif // FEATURE_FASTTAILCALL
+    }
+    else
+    {
+        varNumOut    = compiler->lvaOutgoingArgSpaceVar;
+        argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
+    }
+
+    bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST);
+
+    if (!isStruct) // a normal non-Struct argument
+    {
+        if (varTypeIsSIMD(targetType))
+        {
+            assert(!"unimplemented on LOONGARCH yet");
+        }
+
+        instruction storeIns  = ins_Store(targetType);
+        emitAttr    storeAttr = emitTypeSize(targetType);
+
+        // If it is contained then source must be the integer constant zero
+        if (source->isContained())
+        {
+            assert(source->OperGet() == GT_CNS_INT);
+            assert(source->AsIntConCommon()->IconValue() == 0);
+
+            emit->emitIns_S_R(storeIns, storeAttr, REG_R0, varNumOut, argOffsetOut);
+        }
+        else
+        {
+            genConsumeReg(source);
+            if (storeIns == INS_st_w)
+            {
+                emit->emitIns_R_R_R(INS_add_w, EA_4BYTE, source->GetRegNum(), source->GetRegNum(), REG_R0);
+                storeIns = INS_st_d;
+                storeAttr = EA_8BYTE;
+            }
+            emit->emitIns_S_R(storeIns, storeAttr, source->GetRegNum(), varNumOut, argOffsetOut);
+        }
+        argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
+        assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+    }
+    else // We have some kind of a struct argument
+    {
+        assert(source->isContained()); // We expect that this node was marked as contained in Lower
+
+        if (source->OperGet() == GT_FIELD_LIST)
+        {
+            genPutArgStkFieldList(treeNode, varNumOut);
+        }
+        else // We must have a GT_OBJ or a GT_LCL_VAR
+        {
+            noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
+
+            var_types targetType = source->TypeGet();
+            noway_assert(varTypeIsStruct(targetType));
+
+            // Setup loReg from the internal registers that we reserved in lower.
+            //
+            regNumber loReg = treeNode->ExtractTempReg();
+            regNumber addrReg = REG_NA;
+
+            GenTreeLclVarCommon* varNode  = nullptr;
+            GenTree*             addrNode = nullptr;
+
+            if (source->OperGet() == GT_LCL_VAR)
+            {
+                varNode = source->AsLclVarCommon();
+            }
+            else // we must have a GT_OBJ
+            {
+                assert(source->OperGet() == GT_OBJ);
+
+                addrNode = source->AsOp()->gtOp1;
+
+                // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
+                //
+                if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
+                {
+                    // We have a GT_OBJ(GT_LCL_VAR_ADDR)
+                    //
+                    // We will treat this case the same as above
+                    // (i.e if we just had this GT_LCL_VAR directly as the source)
+                    // so update 'source' to point this GT_LCL_VAR_ADDR node
+                    // and continue to the codegen for the LCL_VAR node below
+                    //
+                    varNode  = addrNode->AsLclVarCommon();
+                    addrNode = nullptr;
+                }
+                else // addrNode is used
+                {
+                    // Generate code to load the address that we need into a register
+                    genConsumeAddress(addrNode);
+                    addrReg = addrNode->GetRegNum();
+                }
+            }
+
+            // Either varNode or addrNOde must have been setup above,
+            // the xor ensures that only one of the two is setup, not both
+            assert((varNode != nullptr) ^ (addrNode != nullptr));
+
+            ClassLayout* layout;
+
+            //unsigned gcPtrCount; // The count of GC pointers in the struct
+            unsigned srcSize;
+            bool     isHfa;
+
+            //gcPtrCount = treeNode->gtNumSlots;
+            // Setup the srcSize and layout
+            if (source->OperGet() == GT_LCL_VAR)
+            {
+                assert(varNode != nullptr);
+                LclVarDsc* varDsc = compiler->lvaGetDesc(varNode);
+
+                // This struct also must live in the stack frame
+                // And it can't live in a register (SIMD)
+                assert(varDsc->lvType == TYP_STRUCT);
+                assert(varDsc->lvOnFrame && !varDsc->lvRegister);
+
+                srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
+                                               // as that is how much stack is allocated for this LclVar
+                layout  = varDsc->GetLayout();
+            }
+            else // we must have a GT_OBJ
+            {
+                assert(source->OperGet() == GT_OBJ);
+
+                // If the source is an OBJ node then we need to use the type information
+                // it provides (size and GC layout) even if the node wraps a lclvar. Due
+                // to struct reinterpretation (e.g. Unsafe.As<X, Y>) it is possible that
+                // the OBJ node has a different type than the lclvar.
+                CORINFO_CLASS_HANDLE objClass = source->AsObj()->GetLayout()->GetClassHandle();
+
+                srcSize = compiler->info.compCompHnd->getClassSize(objClass);
+                layout  = source->AsObj()->GetLayout();
+            }
+
+            unsigned structSize;
+
+            unsigned dstSize = treeNode->GetStackByteSize();
+            if (dstSize != srcSize)
+            {
+                // We can generate a smaller code if store size is a multiple of TARGET_POINTER_SIZE.
+                // The dst size can be rounded up to PUTARG_STK size.
+                // The src size can be rounded up if it reads a local variable slot because the local
+                // variable stack allocation size is rounded up to be a multiple of the TARGET_POINTER_SIZE.
+                // The exception  is arm64 apple arguments because they can be passed without padding.
+                if (varNode != nullptr)
+                {
+                    // If we have a varNode, even if it was casted using `OBJ`, we can read its original memory size.
+                    const LclVarDsc* varDsc       = compiler->lvaGetDesc(varNode);
+                    const unsigned   varStackSize = varDsc->lvSize();
+                    if (varStackSize >= srcSize)
+                    {
+                        srcSize = varStackSize;
+                    }
+                }
+            }
+            if (dstSize == srcSize)
+            {
+                structSize = dstSize;
+            }
+            else
+            {
+                // With Unsafe object wwe can have different strange combinations:
+                // PutArgStk<8>(Obj<16>(LclVar<8>)) -> copy 8 bytes;
+                // PutArgStk<16>(Obj<16>(LclVar<8>)) -> copy 16 bytes, reading undefined memory after the local.
+                structSize = min(dstSize, srcSize);
+            }
+
+            int      remainingSize = structSize;
+            unsigned structOffset  = 0;
+            unsigned nextIndex     = 0;
+
+            while (remainingSize > 0)
+            {
+                var_types type;
+
+                if (remainingSize >= TARGET_POINTER_SIZE)
+                {
+                    type = layout->GetGCPtrType(nextIndex);
+                }
+                else // (remainingSize < TARGET_POINTER_SIZE)
+                {
+                    // the left over size is smaller than a pointer and thus can never be a GC type
+                    assert(!layout->IsGCPtr(nextIndex));
+
+                    if (remainingSize == 1)
+                    {
+                        type = TYP_UBYTE;
+                    }
+                    else if (remainingSize == 2)
+                    {
+                        type = TYP_USHORT;
+                    }
+                    else
+                    {
+                        assert(remainingSize == 4);
+                        type = TYP_UINT;
+                    }
+                }
+                const emitAttr attr     = emitTypeSize(type);
+                const unsigned moveSize = genTypeSize(type);
+                assert(EA_SIZE_IN_BYTES(attr) == moveSize);
+
+                remainingSize -= moveSize;
+
+                instruction loadIns = ins_Load(type);
+                if (varNode != nullptr)
+                {
+                    // Load from our varNumImp source
+                    emit->emitIns_R_S(loadIns, attr, loReg, varNode->GetLclNum(), structOffset);
+                }
+                else
+                {
+                    assert(loReg != addrReg);
+                    // Load from our address expression source
+                    emit->emitIns_R_R_I(loadIns, attr, loReg, addrReg, structOffset);
+                }
+
+                // Emit a store instruction to store the register into the outgoing argument area
+                instruction storeIns = ins_Store(type);
+                emit->emitIns_S_R(storeIns, attr, loReg, varNumOut, argOffsetOut);
+                argOffsetOut += moveSize;
+                assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area
+
+                structOffset += moveSize;
+                nextIndex++;
+            }
+        }
+    }
+}
+
+//---------------------------------------------------------------------
+// genPutArgReg - generate code for a GT_PUTARG_REG node
+//
+// Arguments
+//    tree - the GT_PUTARG_REG node
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutArgReg(GenTreeOp* tree)
+{
+    assert(tree->OperIs(GT_PUTARG_REG));
+
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+
+    assert(targetType != TYP_STRUCT);
+
+    GenTree* op1 = tree->gtOp1;
+    genConsumeReg(op1);
+
+    // If child node is not already in the register we need, move it
+    if (targetReg != op1->GetRegNum())
+    {
+        if (emitter::isFloatReg(targetReg) == emitter::isFloatReg(op1->GetRegNum()))
+            inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType);
+#if 1
+        else if (emitter::isFloatReg(targetReg))
+            GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, op1->GetRegNum());
+        else //if (!emitter::isFloatReg(targetReg))
+        {
+            assert(!emitter::isFloatReg(targetReg));
+            GetEmitter()->emitIns_R_R(INS_movfr2gr_d, EA_8BYTE, targetReg, op1->GetRegNum());
+        }
+#endif
+    }
+    genProduceReg(tree);
+}
+
+#if FEATURE_ARG_SPLIT
+//---------------------------------------------------------------------
+// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node
+//
+// Arguments
+//    tree - the GT_PUTARG_SPLIT node
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode)
+{
+    assert(treeNode->OperIs(GT_PUTARG_SPLIT));
+
+    GenTree* source       = treeNode->gtOp1;
+    emitter* emit         = GetEmitter();
+    unsigned varNumOut    = compiler->lvaOutgoingArgSpaceVar;
+    unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
+
+    if (source->OperGet() == GT_FIELD_LIST)
+    {
+        // Evaluate each of the GT_FIELD_LIST items into their register
+        // and store their register into the outgoing argument area
+        unsigned regIndex         = 0;
+        unsigned firstOnStackOffs = UINT_MAX;
+
+        for (GenTreeFieldList::Use& use : source->AsFieldList()->Uses())
+        {
+            GenTree*  nextArgNode = use.GetNode();
+            regNumber fieldReg    = nextArgNode->GetRegNum();
+            genConsumeReg(nextArgNode);
+
+            if (regIndex >= treeNode->gtNumRegs)
+            {
+                if (firstOnStackOffs == UINT_MAX)
+                {
+                    firstOnStackOffs = use.GetOffset();
+                }
+                var_types type = nextArgNode->TypeGet();
+                emitAttr  attr = emitTypeSize(type);
+
+                unsigned offset = treeNode->getArgOffset() + use.GetOffset() - firstOnStackOffs;
+                // We can't write beyond the outgoing arg area
+                assert(offset + EA_SIZE_IN_BYTES(attr) <= argOffsetMax);
+
+                // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
+                // argument area
+                emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, offset);
+            }
+            else
+            {
+                var_types type   = treeNode->GetRegType(regIndex);
+                regNumber argReg = treeNode->GetRegNumByIdx(regIndex);
+
+                // If child node is not already in the register we need, move it
+                if (argReg != fieldReg)
+                {
+                    inst_RV_RV(ins_Copy(type), argReg, fieldReg, type);
+                }
+                regIndex++;
+            }
+        }
+    }
+    else
+    {
+        var_types targetType = source->TypeGet();
+        assert(source->OperGet() == GT_OBJ);
+        assert(varTypeIsStruct(targetType));
+
+        regNumber baseReg = treeNode->ExtractTempReg();
+        regNumber addrReg = REG_NA;
+
+        GenTreeLclVarCommon* varNode  = nullptr;
+        GenTree*             addrNode = nullptr;
+
+        addrNode = source->AsOp()->gtOp1;
+
+        // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
+        //
+        if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
+        {
+            // We have a GT_OBJ(GT_LCL_VAR_ADDR)
+            //
+            // We will treat this case the same as above
+            // (i.e if we just had this GT_LCL_VAR directly as the source)
+            // so update 'source' to point this GT_LCL_VAR_ADDR node
+            // and continue to the codegen for the LCL_VAR node below
+            //
+            varNode  = addrNode->AsLclVarCommon();
+            addrNode = nullptr;
+        }
+
+        // Either varNode or addrNOde must have been setup above,
+        // the xor ensures that only one of the two is setup, not both
+        assert((varNode != nullptr) ^ (addrNode != nullptr));
+
+        // This is the varNum for our load operations,
+        // only used when we have a struct with a LclVar source
+        unsigned srcVarNum = BAD_VAR_NUM;
+
+        if (varNode != nullptr)
+        {
+            assert(varNode->isContained());
+            srcVarNum = varNode->GetLclNum();
+            assert(srcVarNum < compiler->lvaCount);
+
+            // handle promote situation
+            LclVarDsc* varDsc = compiler->lvaTable + srcVarNum;
+
+            // This struct also must live in the stack frame
+            // And it can't live in a register (SIMD)
+            assert(varDsc->lvType == TYP_STRUCT);
+            assert(varDsc->lvOnFrame && !varDsc->lvRegister);
+
+            // We don't split HFA struct
+            assert(!varDsc->lvIsHfa());
+        }
+        else // addrNode is used
+        {
+            assert(addrNode != nullptr);
+            // TODO-Cleanup: `Lowering::NewPutArg` marks only `LCL_VAR_ADDR` as contained nowadays,
+            // Generate code to load the address that we need into a register
+            genConsumeAddress(addrNode);
+            addrReg = addrNode->GetRegNum();
+
+            // If addrReg equal to baseReg, we use the last target register as alternative baseReg.
+            // Because the candidate mask for the internal baseReg does not include any of the target register,
+            // we can ensure that baseReg, addrReg, and the last target register are not all same.
+            assert(baseReg != addrReg);
+
+            // We don't split HFA struct
+            assert(!compiler->IsHfa(source->AsObj()->GetLayout()->GetClassHandle()));
+       }
+
+        ClassLayout* layout = source->AsObj()->GetLayout();
+
+        // Put on stack first
+        unsigned nextIndex     = treeNode->gtNumRegs;
+        unsigned structOffset  = nextIndex * TARGET_POINTER_SIZE;
+        int      remainingSize = treeNode->GetStackByteSize();
+        unsigned argOffsetOut  = treeNode->getArgOffset();
+
+        // remainingSize is always multiple of TARGET_POINTER_SIZE
+        assert(remainingSize % TARGET_POINTER_SIZE == 0);
+        while (remainingSize > 0)
+        {
+            var_types type = layout->GetGCPtrType(nextIndex);
+
+            if (varNode != nullptr)
+            {
+                // Load from our varNumImp source
+                emit->emitIns_R_S(INS_ld_d, emitTypeSize(type), baseReg, srcVarNum, structOffset);
+            }
+            else
+            {
+                // check for case of destroying the addrRegister while we still need it
+                assert(baseReg != addrReg);
+
+                // Load from our address expression source
+                emit->emitIns_R_R_I(INS_ld_d, emitTypeSize(type), baseReg, addrReg, structOffset);
+            }
+
+            // Emit str instruction to store the register into the outgoing argument area
+            emit->emitIns_S_R(INS_st_d, emitTypeSize(type), baseReg, varNumOut, argOffsetOut);
+
+            argOffsetOut += TARGET_POINTER_SIZE;  // We stored 4-bytes of the struct
+            assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area
+            remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
+            structOffset += TARGET_POINTER_SIZE;
+            nextIndex += 1;
+        }
+
+        // We set up the registers in order, so that we assign the last target register `baseReg` is no longer in use,
+        // in case we had to reuse the last target register for it.
+        structOffset = 0;
+        for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++)
+        {
+            regNumber targetReg = treeNode->GetRegNumByIdx(idx);
+            var_types type      = treeNode->GetRegType(idx);
+
+            if (varNode != nullptr)
+            {
+                // Load from our varNumImp source
+                emit->emitIns_R_S(ins_Load(type), emitTypeSize(type), targetReg, srcVarNum, structOffset);
+            }
+            else
+            {
+                // check for case of destroying the addrRegister while we still need it
+                if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1)
+                {
+                    assert(targetReg != baseReg);
+                    emit->emitIns_R_R_I(INS_ori, emitActualTypeSize(type), baseReg, addrReg, 0);
+                    addrReg = baseReg;
+                }
+
+                // Load from our address expression source
+                emit->emitIns_R_R_I(ins_Load(type), emitTypeSize(type), targetReg, addrReg, structOffset);
+            }
+            structOffset += TARGET_POINTER_SIZE;
+        }
+    }
+    genProduceReg(treeNode);
+}
+#endif // FEATURE_ARG_SPLIT
+
+// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
+//
+// Arguments:
+//    treeNode  -  Gentree of GT_STORE_LCL_VAR
+//
+// Return Value:
+//    None
+//
+// Assumption:
+//    The child of store is a multi-reg call node.
+//    genProduceReg() on treeNode is made by caller of this routine.
+//
+void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
+
+    // Structs of size >=9 and <=16 are returned in two return registers on LOONGARCH64 and HFAs.
+    assert(varTypeIsStruct(treeNode));
+
+    // Assumption: current implementation requires that a multi-reg
+    // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+    // being promoted.
+    unsigned   lclNum = treeNode->AsLclVarCommon()->GetLclNum();
+    LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+    noway_assert(varDsc->lvIsMultiRegRet);
+
+    GenTree*     op1       = treeNode->gtGetOp1();
+    GenTree*     actualOp1 = op1->gtSkipReloadOrCopy();
+    GenTreeCall* call      = actualOp1->AsCall();
+    assert(call->HasMultiRegRetVal());
+
+    genConsumeRegs(op1);
+
+    const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+    unsigned        regCount     = pRetTypeDesc->GetReturnRegCount();
+
+    if (treeNode->GetRegNum() != REG_NA)
+    {
+        assert(!"unimplemented on LOONGARCH yet");
+        // Right now the only enregistrable multi-reg return types supported are SIMD types.
+        assert(varTypeIsSIMD(treeNode));
+        assert(regCount != 0);
+
+        regNumber dst = treeNode->GetRegNum();
+
+        // Treat dst register as a homogenous vector with element size equal to the src size
+        // Insert pieces in reverse order
+        for (int i = regCount - 1; i >= 0; --i)
+        {
+            var_types type = pRetTypeDesc->GetReturnRegType(i);
+            regNumber reg  = call->GetRegNumByIdx(i);
+            if (op1->IsCopyOrReload())
+            {
+                // GT_COPY/GT_RELOAD will have valid reg for those positions
+                // that need to be copied or reloaded.
+                regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+                if (reloadReg != REG_NA)
+                {
+                    reg = reloadReg;
+                }
+            }
+
+            assert(reg != REG_NA);
+            if (varTypeIsFloating(type))
+            {
+                // If the register piece was passed in a floating point register
+                // Use a vector mov element instruction
+                // src is not a vector, so it is in the first element reg[0]
+                // mov dst[i], reg[0]
+                // This effectively moves from `reg[0]` to `dst[i]`, leaving other dst bits unchanged till further
+                // iterations
+                // For the case where reg == dst, if we iterate so that we write dst[0] last, we eliminate the need for
+                // a temporary
+                GetEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(type), dst, reg, i, 0);
+            }
+            else
+            {
+                // If the register piece was passed in an integer register
+                // Use a vector mov from general purpose register instruction
+                // mov dst[i], reg
+                // This effectively moves from `reg` to `dst[i]`
+                GetEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(type), dst, reg, i);
+            }
+        }
+
+        genProduceReg(treeNode);
+    }
+    else
+    {
+        // Stack store
+        int offset = 0;
+        var_types type = pRetTypeDesc->GetReturnRegType(0);
+        regNumber reg  = call->GetRegNumByIdx(0);
+        if (op1->IsCopyOrReload())
+        {
+            // GT_COPY/GT_RELOAD will have valid reg for those positions
+            // that need to be copied or reloaded.
+            regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
+            if (reloadReg != REG_NA)
+            {
+                reg = reloadReg;
+            }
+        }
+
+        assert(reg != REG_NA);
+        GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+
+        if (1 < regCount)
+        {
+            offset = genTypeSize(type);
+            type = pRetTypeDesc->GetReturnRegType(1);
+            reg  = call->GetRegNumByIdx(1);
+            offset = offset < genTypeSize(type) ? genTypeSize(type) : offset;
+            GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+        }
+
+        genUpdateLife(treeNode);
+        varDsc->SetRegNum(REG_STK);
+    }
+}
+
+//------------------------------------------------------------------------
+// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node.
+//
+void CodeGen::genRangeCheck(GenTree* oper)
+{
+    noway_assert(oper->OperIsBoundsCheck());
+    GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+    GenTree* arrLen    = bndsChk->GetArrayLength();
+    GenTree* arrIndex  = bndsChk->GetIndex();
+    GenTree* arrRef    = NULL;
+    int      lenOffset = 0;
+
+    GenTree*     src1;
+    GenTree*     src2;
+    regNumber    reg1;
+    regNumber    reg2;
+    emitJumpKind jmpKind = EJ_jmp;
+
+    genConsumeRegs(arrIndex);
+    genConsumeRegs(arrLen);
+
+    emitter* emit = GetEmitter();
+    GenTreeIntConCommon* intConst = nullptr;
+    if (arrIndex->isContainedIntOrIImmed())
+    {
+        src1 = arrLen;
+        src2 = arrIndex;
+        reg1 = REG_R21;
+        reg2 = src1->GetRegNum();
+
+        intConst = src2->AsIntConCommon();
+        ssize_t imm = intConst->IconValue();
+        if (imm == INT64_MAX)
+        {
+            emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1);
+            emit->emitIns_R_R_I(INS_srli_d, EA_PTRSIZE, REG_R21, REG_R21, 1);
+        }
+        else
+        {
+            emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
+        }
+    }
+    else
+    {
+        src1 = arrIndex;
+        src2 = arrLen;
+        reg1 = src1->GetRegNum();
+
+        if (src2->isContainedIntOrIImmed())
+        {
+            reg2 = REG_R21;
+            ssize_t imm = src2->AsIntConCommon()->IconValue();
+            emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
+        }
+        else
+        {
+            reg2 = src2->GetRegNum();
+        }
+    }
+
+#ifdef DEBUG
+    var_types bndsChkType = genActualType(src2->TypeGet());
+    var_types src1ChkType = genActualType(src1->TypeGet());
+    // Bounds checks can only be 32 or 64 bit sized comparisons.
+    assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
+    assert(src1ChkType == TYP_INT || src1ChkType == TYP_LONG);
+#endif // DEBUG
+
+    genJumpToThrowHlpBlk_la(bndsChk->gtThrowKind, INS_bgeu, reg1, bndsChk->gtIndRngFailBB, reg2);
+}
+
+//---------------------------------------------------------------------
+// genCodeForPhysReg - generate code for a GT_PHYSREG node
+//
+// Arguments
+//    tree - the GT_PHYSREG node
+//
+// Return value:
+//    None
+//
+void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
+{
+    assert(tree->OperIs(GT_PHYSREG));
+
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+
+    if (targetReg != tree->gtSrcReg)
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType);
+        genTransferRegGCState(targetReg, tree->gtSrcReg);
+    }
+
+    genProduceReg(tree);
+}
+
+//---------------------------------------------------------------------
+// genCodeForNullCheck - generate code for a GT_NULLCHECK node
+//
+// Arguments
+//    tree - the GT_NULLCHECK node
+//
+// Return value:
+//    None
+//
+void CodeGen::genCodeForNullCheck(GenTreeIndir* tree)
+{
+    assert(tree->OperIs(GT_NULLCHECK));
+    assert(!tree->gtOp1->isContained());
+    regNumber addrReg = genConsumeReg(tree->gtOp1);
+
+    regNumber targetReg = REG_R0;
+
+    GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, targetReg, addrReg, 0);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
+//                     producing the effective index by subtracting the lower bound.
+//
+// Arguments:
+//    arrIndex - the node for which we're generating code
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
+{
+    emitter*  emit      = GetEmitter();
+    GenTree*  arrObj    = arrIndex->ArrObj();
+    GenTree*  indexNode = arrIndex->IndexExpr();
+    regNumber arrReg    = genConsumeReg(arrObj);
+    regNumber indexReg  = genConsumeReg(indexNode);
+    regNumber tgtReg    = arrIndex->GetRegNum();
+    noway_assert(tgtReg != REG_NA);
+
+    // We will use a temp register to load the lower bound and dimension size values.
+
+    //regNumber tmpReg = arrIndex->GetSingleTempReg();
+    assert(tgtReg != REG_R21);
+
+    unsigned  dim      = arrIndex->gtCurrDim;
+    unsigned  rank     = arrIndex->gtArrRank;
+    unsigned  offset;
+
+    offset = compiler->eeGetMDArrayLowerBoundOffset(rank, dim);
+    emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset);
+    emit->emitIns_R_R_R(INS_sub_w, EA_4BYTE, tgtReg, indexReg, REG_R21);
+
+    offset = compiler->eeGetMDArrayLengthOffset(rank, dim);
+    emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset);
+    genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, tgtReg, nullptr, REG_R21);
+
+    genProduceReg(arrIndex);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrOffset: Generates code to compute the flattened array offset for
+//    one dimension of an array reference:
+//        result = (prevDimOffset * dimSize) + effectiveIndex
+//    where dimSize is obtained from the arrObj operand
+//
+// Arguments:
+//    arrOffset - the node for which we're generating code
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    dimSize and effectiveIndex are always non-negative, the former by design,
+//    and the latter because it has been normalized to be zero-based.
+
+void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
+{
+    GenTree*  offsetNode = arrOffset->gtOffset;
+    GenTree*  indexNode  = arrOffset->gtIndex;
+    regNumber tgtReg     = arrOffset->GetRegNum();
+
+    noway_assert(tgtReg != REG_NA);
+
+    if (!offsetNode->IsIntegralConst(0))
+    {
+        emitter*  emit      = GetEmitter();
+        regNumber offsetReg = genConsumeReg(offsetNode);
+        regNumber indexReg  = genConsumeReg(indexNode);
+        regNumber arrReg    = genConsumeReg(arrOffset->gtArrObj);
+        noway_assert(offsetReg != REG_NA);
+        noway_assert(indexReg != REG_NA);
+        noway_assert(arrReg != REG_NA);
+
+        //regNumber tmpReg = arrOffset->GetSingleTempReg();
+
+        unsigned  dim      = arrOffset->gtCurrDim;
+        unsigned  rank     = arrOffset->gtArrRank;
+        unsigned  offset   = compiler->eeGetMDArrayLengthOffset(rank, dim);
+
+        // Load tmpReg with the dimension size and evaluate
+        // tgtReg = offsetReg*tmpReg + indexReg.
+        emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset);
+        emit->emitIns_R_R_R(INS_mul_d, EA_PTRSIZE, REG_R21, REG_R21, offsetReg);
+        emit->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tgtReg, REG_R21, indexReg);
+    }
+    else
+    {
+        regNumber indexReg = genConsumeReg(indexNode);
+        if (indexReg != tgtReg)
+        {
+            GetEmitter()->emitIns_R_R_I(INS_ori, emitActualTypeSize(TYP_INT), tgtReg, indexReg, 0);
+        }
+    }
+    genProduceReg(arrOffset);
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+//    tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+//    a) All GenTrees are register allocated.
+//
+void CodeGen::genCodeForShift(GenTree* tree)
+{
+    //var_types   targetType = tree->TypeGet();
+    //genTreeOps  oper       = tree->OperGet();
+    instruction ins        = genGetInsForOper(tree);
+    emitAttr    size       = emitActualTypeSize(tree);
+
+    assert(tree->GetRegNum() != REG_NA);
+
+    genConsumeOperands(tree->AsOp());
+
+    GenTree* operand = tree->gtGetOp1();
+    GenTree* shiftBy = tree->gtGetOp2();
+    if (!shiftBy->IsCnsIntOrI())
+    {
+        GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftBy->GetRegNum());
+    }
+    else
+    {
+        unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal;
+
+        //should check shiftByImm for loongarch32-ins.
+        unsigned immWidth = emitter::getBitWidth(size); // For LOONGARCH64, immWidth will be set to 32 or 64
+        shiftByImm &= (immWidth - 1);
+
+        if (ins == INS_slli_w && shiftByImm >= 32)
+        {
+            ins = INS_slli_d;
+        }
+        else if (ins == INS_slli_d && shiftByImm >= 32 && shiftByImm < 64)
+        {
+            ins = INS_slli_d;
+        }
+        else if (ins == INS_srai_d && shiftByImm >= 32 && shiftByImm < 64)
+        {
+            ins = INS_srai_d;
+        }
+        else if (ins == INS_srli_d && shiftByImm >= 32 && shiftByImm < 64)
+        {
+            ins = INS_srli_d;
+        }
+        else if (ins == INS_rotri_d && shiftByImm >= 32 && shiftByImm < 64)
+        {
+            ins = INS_rotri_d;
+        }
+
+        GetEmitter()->emitIns_R_R_I(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftByImm);
+    }
+
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR.
+//
+// Arguments:
+//    tree - the node.
+//
+void CodeGen::genCodeForLclAddr(GenTree* tree)
+{
+    assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
+
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+
+    // Address of a local var.
+    noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL));
+
+    emitAttr size = emitTypeSize(targetType);
+
+    inst_RV_TT(INS_lea, targetReg, tree, 0, size);
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForLclFld: Produce code for a GT_LCL_FLD node.
+//
+// Arguments:
+//    tree - the GT_LCL_FLD node
+//
+void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
+{
+    assert(tree->OperIs(GT_LCL_FLD));
+
+    var_types targetType = tree->TypeGet();
+    regNumber targetReg  = tree->GetRegNum();
+    emitter*  emit       = GetEmitter();
+
+    NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported");
+    assert(targetReg != REG_NA);
+
+    emitAttr size   = emitTypeSize(targetType);
+    unsigned offs   = tree->GetLclOffs();
+    unsigned varNum = tree->GetLclNum();
+    assert(varNum < compiler->lvaCount);
+
+    emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs);
+
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node.
+//
+// Arguments:
+//    tree - the GT_INDEX_ADDR node
+//
+void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
+{
+    GenTree* const base  = node->Arr();
+    GenTree* const index = node->Index();
+
+    genConsumeReg(base);
+    genConsumeReg(index);
+
+    // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers
+    // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the
+    // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until
+    // we are finished generating the code for this node.
+
+    gcInfo.gcMarkRegPtrVal(base->GetRegNum(), base->TypeGet());
+    assert(!varTypeIsGC(index->TypeGet()));
+
+    // The index is never contained, even if it is a constant.
+    assert(index->isUsedFromReg());
+
+    //const regNumber tmpReg = node->GetSingleTempReg();
+
+    // Generate the bounds check if necessary.
+    if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
+    {
+        GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, base->GetRegNum(), node->gtLenOffset);
+        //   if (index >= REG_R21)
+        //   {
+        //     JumpToThrowHlpBlk;
+        //   }
+        //
+        //   sltu  AT, index, REG_R21
+        //   bne  AT, zero, RngChkExit
+        // IndRngFail:
+        //   ...
+        // RngChkExit:
+        genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), node->gtIndRngFailBB, REG_R21);
+    }
+
+    emitAttr attr = emitActualTypeSize(node);
+    // Can we use a ScaledAdd instruction?
+    //
+    if (isPow2(node->gtElemSize) && (node->gtElemSize <= 2048))
+    {
+        DWORD scale;
+        BitScanForward(&scale, node->gtElemSize);
+
+        // dest = base + index * scale
+        genScaledAdd(emitActualTypeSize(node), node->GetRegNum(), base->GetRegNum(), index->GetRegNum(), scale);
+    }
+    else // we have to load the element size and use a MADD (multiply-add) instruction
+    {
+        // REG_R21 = element size
+        CodeGen::genSetRegToIcon(REG_R21, (ssize_t)node->gtElemSize, TYP_INT);
+
+        // dest = index * REG_R21 + base
+        if (attr == EA_4BYTE)
+        {
+            GetEmitter()->emitIns_R_R_R(INS_mul_w, EA_4BYTE, REG_R21, index->GetRegNum(), REG_R21);
+            GetEmitter()->emitIns_R_R_R(INS_add_w, attr, node->GetRegNum(), REG_R21, base->GetRegNum());
+        }
+        else
+        {
+            GetEmitter()->emitIns_R_R_R(INS_mul_d, EA_PTRSIZE, REG_R21, index->GetRegNum(), REG_R21);
+            GetEmitter()->emitIns_R_R_R(INS_add_d, attr, node->GetRegNum(), REG_R21, base->GetRegNum());
+        }
+    }
+
+    // dest = dest + elemOffs
+    GetEmitter()->emitIns_R_R_I(INS_addi_d, attr, node->GetRegNum(), node->GetRegNum(), node->gtElemOffset);
+
+    gcInfo.gcMarkRegSetNpt(base->gtGetRegMask());
+
+    genProduceReg(node);
+}
+
+//------------------------------------------------------------------------
+// genCodeForIndir: Produce code for a GT_IND node.
+//
+// Arguments:
+//    tree - the GT_IND node
+//
+void CodeGen::genCodeForIndir(GenTreeIndir* tree)
+{
+    assert(tree->OperIs(GT_IND));
+
+#ifdef FEATURE_SIMD
+    // Handling of Vector3 type values loaded through indirection.
+    if (tree->TypeGet() == TYP_SIMD12)
+    {
+        genLoadIndTypeSIMD12(tree);
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    var_types   type      = tree->TypeGet();
+    instruction ins       = ins_Load(type);
+    instruction ins2      = INS_none;
+    regNumber   targetReg = tree->GetRegNum();
+    regNumber   tmpReg = targetReg;
+    emitAttr    attr = emitActualTypeSize(type);
+    int offset = 0;
+
+    genConsumeAddress(tree->Addr());
+
+    if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+    {
+        instGen_MemoryBarrier(BARRIER_FULL);
+    }
+
+    GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), targetReg, tree);
+
+    genProduceReg(tree);
+}
+
+//----------------------------------------------------------------------------------
+// genCodeForCpBlkHelper - Generate code for a CpBlk node by the means of the VM memcpy helper call
+//
+// Arguments:
+//    cpBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
+//
+// Preconditions:
+//   The register assignments have been set appropriately.
+//   This is validated by genConsumeBlockOp().
+//
+void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)
+{
+    // Destination address goes in arg0, source address goes in arg1, and size goes in arg2.
+    // genConsumeBlockOp takes care of this for us.
+    genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+    if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a full memory barrier before a volatile CpBlk operation
+        instGen_MemoryBarrier();
+    }
+
+    genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+
+    if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a INS_BARRIER_RMB after a volatile CpBlk operation
+        instGen_MemoryBarrier(BARRIER_FULL);
+    }
+}
+
+//----------------------------------------------------------------------------------
+// genCodeForCpBlkUnroll: Generates CpBlk code by performing a loop unroll
+//
+// Arguments:
+//    cpBlkNode  -  Copy block node
+//
+// Return Value:
+//    None
+//
+// Assumption:
+//  The size argument of the CpBlk node is a constant and <= CPBLK_UNROLL_LIMIT bytes.
+//
+void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
+{
+    assert(cpBlkNode->OperIs(GT_STORE_BLK));
+
+    unsigned  dstLclNum      = BAD_VAR_NUM;
+    regNumber dstAddrBaseReg = REG_NA;
+    int       dstOffset      = 0;
+    GenTree*  dstAddr        = cpBlkNode->Addr();
+
+    if (!dstAddr->isContained())
+    {
+        dstAddrBaseReg = genConsumeReg(dstAddr);
+    }
+    else if (dstAddr->OperIsAddrMode())
+    {
+        assert(!dstAddr->AsAddrMode()->HasIndex());
+
+        dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base());
+        dstOffset      = dstAddr->AsAddrMode()->Offset();
+    }
+    else
+    {
+        assert(dstAddr->OperIsLocalAddr());
+        dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum();
+        dstOffset = dstAddr->AsLclVarCommon()->GetLclOffs();
+    }
+
+    unsigned  srcLclNum      = BAD_VAR_NUM;
+    regNumber srcAddrBaseReg = REG_NA;
+    int       srcOffset      = 0;
+    GenTree*  src            = cpBlkNode->Data();
+
+    assert(src->isContained());
+
+    if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD))
+    {
+        srcLclNum = src->AsLclVarCommon()->GetLclNum();
+        srcOffset = src->AsLclVarCommon()->GetLclOffs();
+    }
+    else
+    {
+        assert(src->OperIs(GT_IND));
+        GenTree* srcAddr = src->AsIndir()->Addr();
+
+        if (!srcAddr->isContained())
+        {
+            srcAddrBaseReg = genConsumeReg(srcAddr);
+        }
+        else if (srcAddr->OperIsAddrMode())
+        {
+            srcAddrBaseReg = genConsumeReg(srcAddr->AsAddrMode()->Base());
+            srcOffset      = srcAddr->AsAddrMode()->Offset();
+        }
+        else
+        {
+            assert(srcAddr->OperIsLocalAddr());
+            srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum();
+            srcOffset = srcAddr->AsLclVarCommon()->GetLclOffs();
+        }
+    }
+
+    if (cpBlkNode->IsVolatile())
+    {
+        // issue a full memory barrier before a volatile CpBlk operation
+        instGen_MemoryBarrier();
+    }
+
+    emitter* emit = GetEmitter();
+    unsigned size = cpBlkNode->GetLayout()->GetSize();
+
+    assert(size <= INT32_MAX);
+    assert(srcOffset < INT32_MAX - static_cast<int>(size));
+    assert(dstOffset < INT32_MAX - static_cast<int>(size));
+
+    regNumber tempReg = cpBlkNode->ExtractTempReg(RBM_ALLINT);
+
+    if (size >= 2 * REGSIZE_BYTES)
+    {
+        regNumber tempReg2 = REG_R21;//cpBlkNode->ExtractTempReg(RBM_ALLINT);//TODO:should amend.
+
+        for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize;
+             size -= regSize, srcOffset += regSize, dstOffset += regSize)
+        {
+            if (srcLclNum != BAD_VAR_NUM)
+            {
+                emit->emitIns_R_S(INS_ld_d, EA_8BYTE, tempReg, srcLclNum, srcOffset);
+                emit->emitIns_R_S(INS_ld_d, EA_8BYTE, tempReg2, srcLclNum, srcOffset + 8);
+            }
+            else
+            {
+                emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tempReg, srcAddrBaseReg, srcOffset);
+                emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tempReg2, srcAddrBaseReg, srcOffset + 8);
+            }
+
+            if (dstLclNum != BAD_VAR_NUM)
+            {
+                emit->emitIns_S_R(INS_st_d, EA_8BYTE, tempReg, dstLclNum, dstOffset);
+                emit->emitIns_S_R(INS_st_d, EA_8BYTE, tempReg2, dstLclNum, dstOffset + 8);
+            }
+            else
+            {
+                emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tempReg, dstAddrBaseReg, dstOffset);
+                emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tempReg2, dstAddrBaseReg, dstOffset + 8);
+            }
+        }
+    }
+
+    for (unsigned regSize = REGSIZE_BYTES; size > 0; size -= regSize, srcOffset += regSize, dstOffset += regSize)
+    {
+        while (regSize > size)
+        {
+            regSize /= 2;
+        }
+
+        instruction loadIns;
+        instruction storeIns;
+        emitAttr    attr;
+
+        switch (regSize)
+        {
+            case 1:
+                loadIns  = INS_ld_b;
+                storeIns = INS_st_b;
+                attr     = EA_4BYTE;
+                break;
+            case 2:
+                loadIns  = INS_ld_h;
+                storeIns = INS_st_h;
+                attr     = EA_4BYTE;
+                break;
+            case 4:
+                loadIns  = INS_ld_w;
+                storeIns = INS_st_w;
+                attr     = EA_ATTR(regSize);
+                break;
+            case 8:
+                loadIns  = INS_ld_d;
+                storeIns = INS_st_d;
+                attr     = EA_ATTR(regSize);
+                break;
+            default:
+                unreached();
+        }
+
+        if (srcLclNum != BAD_VAR_NUM)
+        {
+            emit->emitIns_R_S(loadIns, attr, tempReg, srcLclNum, srcOffset);
+        }
+        else
+        {
+            emit->emitIns_R_R_I(loadIns, attr, tempReg, srcAddrBaseReg, srcOffset);
+        }
+
+        if (dstLclNum != BAD_VAR_NUM)
+        {
+            emit->emitIns_S_R(storeIns, attr, tempReg, dstLclNum, dstOffset);
+        }
+        else
+        {
+            emit->emitIns_R_R_I(storeIns, attr, tempReg, dstAddrBaseReg, dstOffset);
+        }
+    }
+
+    if (cpBlkNode->IsVolatile())
+    {
+        // issue a load barrier after a volatile CpBlk operation
+        instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call
+//
+// Arguments:
+//    initBlkNode - the GT_STORE_[BLK|OBJ|DYN_BLK]
+//
+// Preconditions:
+//   The register assignments have been set appropriately.
+//   This is validated by genConsumeBlockOp().
+//
+void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode)
+{
+    // Size goes in arg2, source address goes in arg1, and size goes in arg2.
+    // genConsumeBlockOp takes care of this for us.
+    genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+    if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
+    {
+        // issue a full memory barrier before a volatile initBlock Operation
+        instGen_MemoryBarrier();
+    }
+
+    genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+}
+
+// Generate code for a load from some address + offset
+//   base: tree node which can be either a local address or arbitrary node
+//   offset: distance from the base from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
+{
+    emitter* emit = GetEmitter();
+
+    if (base->OperIsLocalAddr())
+    {
+        if (base->gtOper == GT_LCL_FLD_ADDR)
+            offset += base->AsLclFld()->GetLclOffs();
+        emit->emitIns_R_S(ins, size, dst, base->AsLclVarCommon()->GetLclNum(), offset);
+    }
+    else
+    {
+        emit->emitIns_R_R_I(ins, size, dst, base->GetRegNum(), offset);
+    }
+}
+
+// Generate code for a store to some address + offset
+//   base: tree node which can be either a local address or arbitrary node
+//   offset: distance from the base from which to load
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
+{
+    emitter* emit = GetEmitter();
+
+    if (base->OperIsLocalAddr())
+    {
+        if (base->gtOper == GT_LCL_FLD_ADDR)
+            offset += base->AsLclFld()->GetLclOffs();
+        emit->emitIns_S_R(ins, size, src, base->AsLclVarCommon()->GetLclNum(), offset);
+    }
+    else
+    {
+        emit->emitIns_R_R_I(ins, size, src, base->GetRegNum(), offset);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCallInstruction: Produce code for a GT_CALL node
+//
+void CodeGen::genCallInstruction(GenTreeCall* call)
+{
+    gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+    DebugInfo di;
+
+    // all virtuals should have been expanded into a control expression
+    assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+    // Consume all the arg regs
+    for (GenTreeCall::Use& use : call->LateArgs())
+    {
+        GenTree* argNode = use.GetNode();
+
+        fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+        assert(curArgTabEntry);
+
+        // GT_RELOAD/GT_COPY use the child node
+        argNode = argNode->gtSkipReloadOrCopy();
+
+        if (curArgTabEntry->GetRegNum() == REG_STK)
+            continue;
+
+        // Deal with multi register passed struct args.
+        if (argNode->OperGet() == GT_FIELD_LIST)
+        {
+            regNumber argReg = curArgTabEntry->GetRegNum();
+            for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses())
+            {
+                GenTree* putArgRegNode = use.GetNode();
+                assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+                genConsumeReg(putArgRegNode);
+#if 0
+                inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ true, argReg, putArgRegNode->GetRegNum(),
+                                /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL));
+
+                argReg = genRegArgNext(argReg);
+#endif
+            }
+        }
+#if FEATURE_ARG_SPLIT
+        else if (curArgTabEntry->IsSplit())
+        {
+            assert(curArgTabEntry->numRegs >= 1);
+            genConsumeArgSplitStruct(argNode->AsPutArgSplit());
+        }
+#endif // FEATURE_ARG_SPLIT
+        else
+        {
+            regNumber argReg = curArgTabEntry->GetRegNum();
+            genConsumeReg(argNode);
+            if (argNode->GetRegNum() != argReg)
+            {
+                inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, argNode->GetRegNum());
+            }
+        }
+    }
+
+    // Insert a null check on "this" pointer if asked.
+    if (call->NeedsNullCheck())
+    {
+        const regNumber regThis = genGetThisArgReg(call);
+
+        // Ditto as genCodeForNullCheck
+        GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, regThis, 0);
+    }
+
+    // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper
+    // method.
+    CORINFO_METHOD_HANDLE methHnd;
+    GenTree*              target = call->gtControlExpr;
+    if (callType == CT_INDIRECT)
+    {
+        assert(target == nullptr);
+        target  = call->gtCallAddr;
+        methHnd = nullptr;
+    }
+    else
+    {
+        methHnd = call->gtCallMethHnd;
+    }
+
+    CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+    // Pass the call signature information down into the emitter so the emitter can associate
+    // native call sites with the signatures they were generated from.
+    if (callType != CT_HELPER)
+    {
+        sigInfo = call->callSig;
+    }
+#endif // DEBUG
+
+    // If fast tail call, then we are done.  In this case we setup the args (both reg args
+    // and stack args in incoming arg area) and call target.  Epilog sequence would
+    // generate "br <reg>".
+    if (call->IsFastTailCall())
+    {
+        // Don't support fast tail calling JIT helpers
+        assert(callType != CT_HELPER);
+
+        if (target != nullptr)
+        {
+            // Indirect fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+            genConsumeReg(target);
+
+            // Use REG_FASTTAILCALL_TARGET on LOONGARCH64 as the call target register.
+            if (target->GetRegNum() != REG_FASTTAILCALL_TARGET)
+            {
+                GetEmitter()->emitIns_R_R_I(INS_ori, EA_4BYTE, REG_FASTTAILCALL_TARGET, target->GetRegNum(), 0);
+            }
+        }
+
+        return;
+    }
+
+    // For a pinvoke to unmanaged code we emit a label to clear
+    // the GC pointer state before the callsite.
+    // We can't utilize the typical lazy killing of GC pointers
+    // at (or inside) the callsite.
+    if (compiler->killGCRefs(call))
+    {
+        genDefineTempLabel(genCreateTempLabel());
+    }
+
+    // Determine return value size(s).
+    const ReturnTypeDesc* pRetTypeDesc  = call->GetReturnTypeDesc();
+    emitAttr              retSize       = EA_PTRSIZE;
+    emitAttr              secondRetSize = EA_UNKNOWN;
+
+    if (call->HasMultiRegRetVal())
+    {
+        retSize       = emitTypeSize(pRetTypeDesc->GetReturnRegType(0));
+        secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1));
+    }
+    else
+    {
+        assert(call->gtType != TYP_STRUCT);
+
+        if (call->gtType == TYP_REF)
+        {
+            retSize = EA_GCREF;
+        }
+        else if (call->gtType == TYP_BYREF)
+        {
+            retSize = EA_BYREF;
+        }
+    }
+
+    // We need to propagate the IL offset information to the call instruction, so we can emit
+    // an IL to native mapping record for the call, to support managed return value debugging.
+    // We don't want tail call helper calls that were converted from normal calls to get a record,
+    // so we skip this hash table lookup logic in that case.
+    if (compiler->opts.compDbgInfo && compiler->genCallSite2DebugInfoMap != nullptr && !call->IsTailCall())
+    {
+        (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di);
+    }
+
+    if (target != nullptr)
+    {
+        // A call target can not be a contained indirection
+        assert(!target->isContainedIndir());
+
+        genConsumeReg(target);
+
+        // We have already generated code for gtControlExpr evaluating it into a register.
+        // We just need to emit "call reg" in this case.
+        //
+        assert(genIsValidIntReg(target->GetRegNum()));
+
+        genEmitCall(emitter::EC_INDIR_R, methHnd,
+                    INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
+                    retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di,
+                    target->GetRegNum(), call->IsFastTailCall());
+    }
+    else if (call->IsR2ROrVirtualStubRelativeIndir())
+    {
+        // Generate a direct call to a non-virtual user defined or helper method
+        assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+#ifdef FEATURE_READYTORUN_COMPILER
+        assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) ||
+               ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE)));
+#endif // FEATURE_READYTORUN_COMPILER
+        assert(call->gtControlExpr == nullptr);
+        assert(!call->IsTailCall());
+
+        regNumber tmpReg = call->GetSingleTempReg();
+        GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, REG_R2R_INDIRECT_PARAM);
+
+        // We have now generated code for gtControlExpr evaluating it into `tmpReg`.
+        // We just need to emit "call tmpReg" in this case.
+        //
+        assert(genIsValidIntReg(tmpReg));
+
+        genEmitCall(emitter::EC_INDIR_R, methHnd,
+                    INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
+                    retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, tmpReg, call->IsFastTailCall());
+    }
+    else
+    {
+        // Generate a direct call to a non-virtual user defined or helper method
+        assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+        void* addr = nullptr;
+#ifdef FEATURE_READYTORUN_COMPILER
+        if (call->gtEntryPoint.addr != NULL)
+        {
+            assert(call->gtEntryPoint.accessType == IAT_VALUE);
+            addr = call->gtEntryPoint.addr;
+        }
+        else
+#endif // FEATURE_READYTORUN_COMPILER
+            if (callType == CT_HELPER)
+        {
+            CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+            noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+            void* pAddr = nullptr;
+            addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+            assert(pAddr == nullptr);
+        }
+        else
+        {
+            // Direct call to a non-virtual user function.
+            addr = call->gtDirectCallAddress;
+        }
+
+        assert(addr != nullptr);
+
+// Non-virtual direct call to known addresses
+        {
+            genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr,
+                        retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, REG_R21, call->IsFastTailCall());
+        }
+    }
+
+    // if it was a pinvoke we may have needed to get the address of a label
+    if (genPendingCallLabel)
+    {
+        genDefineInlineTempLabel(genPendingCallLabel);
+        genPendingCallLabel = nullptr;
+    }
+
+    // Update GC info:
+    // All Callee arg registers are trashed and no longer contain any GC pointers.
+    // TODO-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+    // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+    // registers from RBM_CALLEE_TRASH
+    assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+    assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+    gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+    gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+    var_types returnType = call->TypeGet();
+    if (returnType != TYP_VOID)
+    {
+        regNumber returnReg;
+
+        if (call->HasMultiRegRetVal())
+        {
+            assert(pRetTypeDesc != nullptr);
+            unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+            // If regs allocated to call node are different from ABI return
+            // regs in which the call has returned its result, move the result
+            // to regs allocated to call node.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                var_types regType      = pRetTypeDesc->GetReturnRegType(i);
+                returnReg              = pRetTypeDesc->GetABIReturnReg(i);
+                regNumber allocatedReg = call->GetRegNumByIdx(i);
+                if (returnReg != allocatedReg)
+                {
+                    inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+                }
+            }
+        }
+        else
+        {
+            if (varTypeUsesFloatArgReg(returnType))
+            {
+                returnReg = REG_FLOATRET;
+            }
+            else
+            {
+                returnReg = REG_INTRET;
+            }
+
+            if (call->GetRegNum() != returnReg)
+            {
+                {
+                    inst_RV_RV(ins_Copy(returnType), call->GetRegNum(), returnReg, returnType);
+                }
+            }
+        }
+
+        genProduceReg(call);
+    }
+
+    // If there is nothing next, that means the result is thrown away, so this value is not live.
+    // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+    if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+    {
+        gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+    }
+}
+
+// Produce code for a GT_JMP node.
+// The arguments of the caller needs to be transferred to the callee before exiting caller.
+// The actual jump to callee is generated as part of caller epilog sequence.
+// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
+void CodeGen::genJmpMethod(GenTree* jmp)
+{
+    assert(jmp->OperGet() == GT_JMP);
+    assert(compiler->compJmpOpUsed);
+
+    // If no arguments, nothing to do
+    if (compiler->info.compArgsCount == 0)
+    {
+        return;
+    }
+
+    // Make sure register arguments are in their initial registers
+    // and stack arguments are put back as well.
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    // First move any en-registered stack arguments back to the stack.
+    // At the same time any reg arg not in correct reg is moved back to its stack location.
+    //
+    // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
+    // But that would require us to deal with circularity while moving values around.  Spilling
+    // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
+    // are not frequent.
+    for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+    {
+        varDsc = compiler->lvaTable + varNum;
+
+        if (varDsc->lvPromoted)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = varDsc->lvFieldLclStart;
+            varDsc               = compiler->lvaTable + fieldVarNum;
+        }
+        noway_assert(varDsc->lvIsParam);
+
+        if (varDsc->lvIsRegArg && (varDsc->GetRegNum() != REG_STK))
+        {
+            // Skip reg args which are already in its right register for jmp call.
+            // If not, we will spill such args to their stack locations.
+            //
+            // If we need to generate a tail call profiler hook, then spill all
+            // arg regs to free them up for the callback.
+            if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg()))
+                continue;
+        }
+        else if (varDsc->GetRegNum() == REG_STK)
+        {
+            // Skip args which are currently living in stack.
+            continue;
+        }
+
+        // If we came here it means either a reg argument not in the right register or
+        // a stack argument currently living in a register.  In either case the following
+        // assert should hold.
+        assert(varDsc->GetRegNum() != REG_STK);
+        assert(varDsc->TypeGet() != TYP_STRUCT);
+        var_types storeType = genActualType(varDsc->TypeGet());
+        emitAttr  storeSize = emitActualTypeSize(storeType);
+
+        GetEmitter()->emitIns_S_R(ins_Store(storeType), storeSize, varDsc->GetRegNum(), varNum, 0);
+        // Update GetRegNum() life and GC info to indicate GetRegNum() is dead and varDsc stack slot is going live.
+        // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it.
+        // Therefore manually update life of varDsc->GetRegNum().
+        regMaskTP tempMask = genRegMask(varDsc->GetRegNum());
+        regSet.RemoveMaskVars(tempMask);
+        gcInfo.gcMarkRegSetNpt(tempMask);
+        if (compiler->lvaIsGCTracked(varDsc))
+        {
+            VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+        }
+    }
+
+#ifdef PROFILING_SUPPORTED
+    // At this point all arg regs are free.
+    // Emit tail call profiler callback.
+    genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif
+
+    // Next move any un-enregistered register arguments back to their register.
+    regMaskTP fixedIntArgMask = RBM_NONE;    // tracks the int arg regs occupying fixed args in case of a vararg method.
+    unsigned  firstArgVarNum  = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+    for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+    {
+        varDsc = compiler->lvaTable + varNum;
+        if (varDsc->lvPromoted)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = varDsc->lvFieldLclStart;
+            varDsc               = compiler->lvaTable + fieldVarNum;
+        }
+        noway_assert(varDsc->lvIsParam);
+
+        // Skip if arg not passed in a register.
+        if (!varDsc->lvIsRegArg)
+            continue;
+
+        // Register argument
+        noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+
+        // Is register argument already in the right register?
+        // If not load it from its stack location.
+        regNumber argReg     = varDsc->GetArgReg(); // incoming arg register
+        regNumber argRegNext = REG_NA;
+
+        if (varDsc->GetRegNum() != argReg)
+        {
+            var_types loadType = TYP_UNDEF;
+
+            //NOTE for LOONGARCH: not supports the HFA.
+            assert(!varDsc->lvIsHfaRegArg());
+            {
+                if (varTypeIsStruct(varDsc))
+                {
+                    // Must be <= 16 bytes or else it wouldn't be passed in registers,
+                    // which can be bigger (and is handled above).
+                    noway_assert(EA_SIZE_IN_BYTES(varDsc->lvSize()) <= 16);
+                    if (emitter::isFloatReg(argReg))
+                    {
+                        loadType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
+                    }
+                    else
+                        loadType = varDsc->GetLayout()->GetGCPtrType(0);
+                }
+                else
+                {
+                    loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
+                }
+
+                emitAttr loadSize = emitActualTypeSize(loadType);
+                GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0);
+
+                // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+                // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it.
+                // Therefore manually update life of argReg.  Note that GT_JMP marks the end of the basic block
+                // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+                regSet.AddMaskVars(genRegMask(argReg));
+                gcInfo.gcMarkRegPtrVal(argReg, loadType);
+
+                //if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
+                if (varDsc->GetOtherArgReg() < REG_STK)
+                {
+                    // Restore the second register.
+                    argRegNext = varDsc->GetOtherArgReg();
+
+                    if (emitter::isFloatReg(argRegNext))
+                    {
+                        loadType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE;
+                    }
+                    else
+                        loadType = varDsc->GetLayout()->GetGCPtrType(1);
+
+                    loadSize = emitActualTypeSize(loadType);
+                    int offs = loadSize == EA_4BYTE ? 4 : 8;
+                    GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, offs);
+
+                    regSet.AddMaskVars(genRegMask(argRegNext));
+                    gcInfo.gcMarkRegPtrVal(argRegNext, loadType);
+                }
+
+                if (compiler->lvaIsGCTracked(varDsc))
+                {
+                    VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+                }
+            }
+        }
+
+        if (compiler->info.compIsVarArgs)
+        {
+            assert(!"unimplemented on LOONGARCH yet!");
+            // In case of a jmp call to a vararg method ensure only integer registers are passed.
+            assert((genRegMask(argReg) & (RBM_ARG_REGS)) != RBM_NONE);
+            assert(!varDsc->lvIsHfaRegArg());
+
+            fixedIntArgMask |= genRegMask(argReg);
+
+            if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
+            {
+                assert(argRegNext != REG_NA);
+                fixedIntArgMask |= genRegMask(argRegNext);
+            }
+
+            if (argReg == REG_ARG_0)
+            {
+                assert(firstArgVarNum == BAD_VAR_NUM);
+                firstArgVarNum = varNum;
+            }
+        }
+
+    }
+
+    // Jmp call to a vararg method - if the method has fewer than fixed arguments that can be max size of reg,
+    // load the remaining integer arg registers from the corresponding
+    // shadow stack slots.  This is for the reason that we don't know the number and type
+    // of non-fixed params passed by the caller, therefore we have to assume the worst case
+    // of caller passing all integer arg regs that can be max size of reg.
+    //
+    // The caller could have passed gc-ref/byref type var args.  Since these are var args
+    // the callee no way of knowing their gc-ness.  Therefore, mark the region that loads
+    // remaining arg registers from shadow stack slots as non-gc interruptible.
+    if (fixedIntArgMask != RBM_NONE)
+    {
+        assert(compiler->info.compIsVarArgs);
+        assert(firstArgVarNum != BAD_VAR_NUM);
+
+        regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
+        if (remainingIntArgMask != RBM_NONE)
+        {
+            GetEmitter()->emitDisableGC();
+            for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
+            {
+                regNumber argReg     = intArgRegs[argNum];
+                regMaskTP argRegMask = genRegMask(argReg);
+
+                if ((remainingIntArgMask & argRegMask) != 0)
+                {
+                    remainingIntArgMask &= ~argRegMask;
+                    GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, argReg, firstArgVarNum, argOffset);
+                }
+
+                argOffset += REGSIZE_BYTES;
+            }
+            GetEmitter()->emitEnableGC();
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genIntCastOverflowCheck: Generate overflow checking code for an integer cast.
+//
+// Arguments:
+//    cast - The GT_CAST node
+//    desc - The cast description
+//    reg  - The register containing the value to check
+//
+void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& desc, regNumber reg)
+{
+    switch (desc.CheckKind())
+    {
+        case GenIntCastDesc::CHECK_POSITIVE:
+        {
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, REG_R0);
+        }
+        break;
+
+        case GenIntCastDesc::CHECK_UINT_RANGE:
+        {
+            // We need to check if the value is not greater than 0xFFFFFFFF
+            // if the upper 32 bits are zero.
+            ssize_t imm = -1;
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm);
+
+            GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 32);
+            GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21);
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+        }
+        break;
+
+        case GenIntCastDesc::CHECK_POSITIVE_INT_RANGE:
+        {
+            // We need to check if the value is not greater than 0x7FFFFFFF
+            // if the upper 33 bits are zero.
+            //instGen_Set_Reg_To_Imm(EA_8BYTE, REG_R21, 0xFFFFFFFF80000000LL);
+            ssize_t imm = -1;
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm);
+
+            GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_8BYTE, REG_R21, REG_R21, 31);
+
+            GetEmitter()->emitIns_R_R_R(INS_and, EA_8BYTE, REG_R21, reg, REG_R21);
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+        }
+        break;
+
+        case GenIntCastDesc::CHECK_INT_RANGE:
+        {
+            const regNumber tempReg = cast->GetSingleTempReg();
+            assert(tempReg != reg);
+            GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MAX);
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, tempReg, nullptr, reg);
+
+            GetEmitter()->emitIns_I_la(EA_8BYTE, tempReg, INT32_MIN);
+            genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, reg, nullptr, tempReg);
+        }
+        break;
+
+        default:
+        {
+            assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE);
+            const int castMaxValue = desc.CheckSmallIntMax();
+            const int castMinValue = desc.CheckSmallIntMin();
+            instruction ins;
+
+            if (castMaxValue > 2047)
+            {//should amend.   should confirm !?!?
+                assert((castMaxValue == 32767) || (castMaxValue == 65535));
+                GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_R21, castMaxValue + 1);
+                ins = castMinValue == 0 ? INS_bgeu : INS_bge;
+                genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_R21);
+            }
+            else
+            {//should amend.
+                GetEmitter()->emitIns_R_R_I(INS_addi_w, EA_ATTR(desc.CheckSrcSize()), REG_R21, REG_R0, castMaxValue);
+                ins = castMinValue == 0 ? INS_bltu : INS_blt;
+                genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_R21, nullptr, reg);
+            }
+
+            if (castMinValue != 0)
+            {
+                if ((-2048 <= castMinValue) && (castMinValue < 2048))
+                {
+                    GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, castMinValue);
+                }
+                else
+                {
+                    GetEmitter()->emitIns_I_la(EA_8BYTE, REG_R21, castMinValue);
+                    GetEmitter()->emitIns_R_R_R(INS_slt, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, REG_R21);
+                }
+                genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+            }
+        }
+        break;
+    }
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast, with or without overflow check.
+//
+// Arguments:
+//    cast - The GT_CAST node
+//
+// Assumptions:
+//    The cast node is not a contained node and must have an assigned register.
+//    Neither the source nor target type can be a floating point type.
+//
+// TODO-LOONGARCH64-CQ: Allow castOp to be a contained node without an assigned register.
+//
+void CodeGen::genIntToIntCast(GenTreeCast* cast)
+{
+    genConsumeRegs(cast->gtGetOp1());
+
+    emitter* emit = GetEmitter();
+    var_types dstType = cast->CastToType();
+    var_types srcType = genActualType(cast->gtGetOp1()->TypeGet());
+    const regNumber srcReg = cast->gtGetOp1()->GetRegNum();
+    const regNumber dstReg = cast->GetRegNum();
+    const unsigned char pos = 0;
+    const unsigned char size = 32;
+
+    assert(genIsValidIntReg(srcReg));
+    assert(genIsValidIntReg(dstReg));
+
+    GenIntCastDesc desc(cast);
+
+    if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE)
+    {
+        genIntCastOverflowCheck(cast, desc, srcReg);
+    }
+
+    //if ((EA_ATTR(genTypeSize(srcType)) == EA_8BYTE) && (EA_ATTR(genTypeSize(dstType)) == EA_4BYTE))
+    //{
+    //    if (dstType == TYP_INT)
+    //    {
+    //        // convert t0 int32
+    //        emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);
+    //    }
+    //    else
+    //    {
+    //        // convert t0 uint32
+    //        emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos);
+    //    }
+    //}
+    //else if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg))
+    if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg))
+    {
+        instruction ins;
+
+        switch (desc.ExtendKind())
+        {
+            case GenIntCastDesc::ZERO_EXTEND_SMALL_INT:
+                if (desc.ExtendSrcSize() == 1)
+                {
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+7, pos);
+                }
+                else
+                {
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+15, pos);
+                }
+                break;
+            case GenIntCastDesc::SIGN_EXTEND_SMALL_INT:
+                ins = (desc.ExtendSrcSize() == 1) ? INS_ext_w_b : INS_ext_w_h;
+                emit->emitIns_R_R(ins, EA_PTRSIZE, dstReg, srcReg);
+                break;
+#ifdef TARGET_64BIT
+            case GenIntCastDesc::ZERO_EXTEND_INT:
+                emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos);
+                break;
+            case GenIntCastDesc::SIGN_EXTEND_INT:
+                emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);
+                break;
+#endif
+            default:
+                assert(desc.ExtendKind() == GenIntCastDesc::COPY);
+#if 1
+                if (srcType == TYP_INT)
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);//should amend.
+                else
+                    emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0);
+#else
+                emit->emitIns_R_R(INS_mov, EA_PTRSIZE, dstReg, srcReg);
+#endif
+                break;
+        }
+    }
+
+    genProduceReg(cast);
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    The cast is between float and double.
+//
+void CodeGen::genFloatToFloatCast(GenTree* treeNode)
+{
+    // float <--> double conversions are always non-overflow ones
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->GetRegNum();
+    assert(genIsValidFloatReg(targetReg));
+
+    GenTree* op1 = treeNode->AsOp()->gtOp1;
+    assert(!op1->isContained());               // Cannot be contained
+    assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg.
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+    genConsumeOperands(treeNode->AsOp());
+
+    // treeNode must be a reg
+    assert(!treeNode->isContained());
+
+    if (srcType != dstType)
+    {
+        instruction ins = (srcType == TYP_FLOAT) ? INS_fcvt_d_s  // convert Single to Double
+                                                 : INS_fcvt_s_d; // convert Double to Single
+
+        GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum());
+    }
+    else if (treeNode->GetRegNum() != op1->GetRegNum())
+    {
+        // If double to double cast or float to float cast. Emit a move instruction.
+        instruction ins = (srcType == TYP_FLOAT) ? INS_fmov_s : INS_fmov_d;
+        GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(treeNode), treeNode->GetRegNum(), op1->GetRegNum());
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCreateAndStoreGCInfo: Create and record GC Info for the function.
+//
+void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
+                                      unsigned prologSize,
+                                      unsigned epilogSize DEBUGARG(void* codePtr))
+{
+    IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
+    GcInfoEncoder* gcInfoEncoder  = new (compiler, CMK_GC)
+        GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+    assert(gcInfoEncoder != nullptr);
+
+    // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+    gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+    // We keep the call count for the second call to gcMakeRegPtrTable() below.
+    unsigned callCnt = 0;
+
+    // First we figure out the encoder ID's for the stack slots and registers.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
+
+    // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+    gcInfoEncoder->FinalizeSlotIds();
+
+    // Now we can actually use those slot ID's to declare live ranges.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
+
+    if (compiler->opts.compDbgEnC)
+    {
+        // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
+        // which is:
+        //  -return address
+        //  -saved off RBP
+        //  -saved 'this' pointer and bool for synchronized methods
+
+        // 4 slots for RBP + return address + RSI + RDI
+        int preservedAreaSize = 4 * REGSIZE_BYTES;
+
+        if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+        {
+            if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+                preservedAreaSize += REGSIZE_BYTES;
+
+            preservedAreaSize += 1; // bool for synchronized methods
+        }
+
+        // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
+        // frame
+        gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
+    }
+
+    if (compiler->opts.IsReversePInvoke())
+    {
+        unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar;
+        assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM);
+        const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber);//TODO: unused.
+        gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar->GetStackOffset());
+    }
+
+    gcInfoEncoder->Build();
+
+    // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+    // let's save the values anyway for debugging purposes
+    compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+    compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+
+/* TODO for LOONGARCH64: not used for loongarch */
+// clang-format off
+const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32]
+{
+    //{ },       // NONE
+    //{ },       // 1
+    //{ EJ_lt }, // SLT
+    //{ EJ_le }, // SLE
+    //{ EJ_ge }, // SGE
+    //{ EJ_gt }, // SGT
+    //{ EJ_mi }, // S
+    //{ EJ_pl }, // NS
+
+    //{ EJ_eq }, // EQ
+    //{ EJ_ne }, // NE
+    //{ EJ_lo }, // ULT
+    //{ EJ_ls }, // ULE
+    //{ EJ_hs }, // UGE
+    //{ EJ_hi }, // UGT
+    //{ EJ_hs }, // C
+    //{ EJ_lo }, // NC
+
+    //{ EJ_eq },                // FEQ
+    //{ EJ_gt, GT_AND, EJ_lo }, // FNE
+    //{ EJ_lo },                // FLT
+    //{ EJ_ls },                // FLE
+    //{ EJ_ge },                // FGE
+    //{ EJ_gt },                // FGT
+    //{ EJ_vs },                // O
+    //{ EJ_vc },                // NO
+
+    //{ EJ_eq, GT_OR, EJ_vs },  // FEQU
+    //{ EJ_ne },                // FNEU
+    //{ EJ_lt },                // FLTU
+    //{ EJ_le },                // FLEU
+    //{ EJ_hs },                // FGEU
+    //{ EJ_hi },                // FGTU
+    //{ },                      // P
+    //{ },                      // NP
+};
+// clang-format on
+
+//------------------------------------------------------------------------
+// inst_SETCC: Generate code to set a register to 0 or 1 based on a condition.
+//
+// Arguments:
+//   condition - The condition
+//   type      - The type of the value to be produced
+//   dstReg    - The destination register to be set to 1 or 0
+//
+void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstReg)
+{
+    /* TODO for LOONGARCH64: should redesign and delete. */
+	assert(!"unimplemented on LOONGARCH yet");
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
+//
+// Arguments:
+//    tree - the node
+//
+void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
+{
+    assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
+
+    if (blkOp->OperIs(GT_STORE_OBJ))
+    {
+        assert(!blkOp->gtBlkOpGcUnsafe);
+        assert(blkOp->OperIsCopyBlkOp());
+        assert(blkOp->AsObj()->GetLayout()->HasGCPtr());
+        genCodeForCpObj(blkOp->AsObj());
+        return;
+    }
+    if (blkOp->gtBlkOpGcUnsafe)
+    {
+        GetEmitter()->emitDisableGC();
+    }
+    bool isCopyBlk = blkOp->OperIsCopyBlkOp();
+
+    switch (blkOp->gtBlkOpKind)
+    {
+        case GenTreeBlk::BlkOpKindHelper:
+            if (isCopyBlk)
+            {
+                genCodeForCpBlkHelper(blkOp);
+            }
+            else
+            {
+                genCodeForInitBlkHelper(blkOp);
+            }
+            break;
+
+        case GenTreeBlk::BlkOpKindUnroll:
+            if (isCopyBlk)
+            {
+                genCodeForCpBlkUnroll(blkOp);
+            }
+            else
+            {
+                genCodeForInitBlkUnroll(blkOp);
+            }
+            break;
+
+        default:
+            unreached();
+    }
+
+    if (blkOp->gtBlkOpGcUnsafe)
+    {
+        GetEmitter()->emitEnableGC();
+    }
+}
+#if 1
+//------------------------------------------------------------------------
+// genScaledAdd: A helper for genLeaInstruction.
+//TODO: can amend further.
+void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale)
+{
+    emitter* emit = GetEmitter();
+    if (scale == 0)
+    {
+        // target = base + index
+        emit->emitIns_R_R_R(INS_add_d, attr, targetReg, baseReg, indexReg);
+    }
+    else
+    {
+        // target = base + index<<scale
+        emit->emitIns_R_R_I(INS_slli_d, attr, REG_R21, indexReg, scale);
+        emit->emitIns_R_R_R(INS_add_d, attr, targetReg, baseReg, REG_R21);
+    }
+}
+#endif
+//------------------------------------------------------------------------
+// genLeaInstruction: Produce code for a GT_LEA node.
+//
+// Arguments:
+//    lea - the node
+//
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+    genConsumeOperands(lea);
+    emitter* emit   = GetEmitter();
+    emitAttr size   = emitTypeSize(lea);
+    int      offset = lea->Offset();
+
+    // In LOONGARCH we can only load addresses of the form:
+    //
+    // [Base + index*scale]
+    // [Base + Offset]
+    // [Literal] (PC-Relative)
+    //
+    // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate:
+    // destReg = baseReg + indexReg * scale;
+    // destReg = destReg + offset;
+    //
+    // TODO-LOONGARCH64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture
+    //             addressing mode instruction.  Currently we're 'cheating' by producing one or more
+    //             instructions to generate the addressing mode so we need to modify lowering to
+    //             produce LEAs that are a 1:1 relationship to the LOONGARCH64 architecture.
+    if (lea->Base() && lea->Index())
+    {
+        GenTree* memBase = lea->Base();
+        GenTree* index   = lea->Index();
+
+        DWORD scale;
+
+        assert(isPow2(lea->gtScale));
+        BitScanForward(&scale, lea->gtScale);
+
+        assert(scale <= 4);
+
+        if (offset != 0)
+        {
+            regNumber tmpReg = lea->GetSingleTempReg();
+
+            // When generating fully interruptible code we have to use the "large offset" sequence
+            // when calculating a EA_BYREF as we can't report a byref that points outside of the object
+            //
+            bool useLargeOffsetSeq = compiler->GetInterruptible() && (size == EA_BYREF);
+
+            if (!useLargeOffsetSeq && ((-2048 <= offset) && (offset <= 2047)))
+            {
+                // Generate code to set tmpReg = base + index*scale
+                genScaledAdd(size, tmpReg, memBase->GetRegNum(), index->GetRegNum(), scale);
+
+                // Then compute target reg from [tmpReg + offset]
+                emit->emitIns_R_R_I(INS_addi_d, size, lea->GetRegNum(), tmpReg, offset);
+            }
+            else // large offset sequence
+            {
+                noway_assert(tmpReg != index->GetRegNum());
+                noway_assert(tmpReg != memBase->GetRegNum());
+
+                // First load/store tmpReg with the offset constant
+                //      rTmp = imm
+                GetEmitter()->emitIns_I_la(EA_PTRSIZE, tmpReg, offset);
+
+                // Then add the scaled index register
+                //      rTmp = rTmp + index*scale
+                genScaledAdd(EA_PTRSIZE, tmpReg, tmpReg, index->GetRegNum(), scale);
+
+                // Then compute target reg from [base + tmpReg ]
+                //      rDst = base + rTmp
+                emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg);
+            }
+        }
+        else
+        {
+            // Then compute target reg from [base + index*scale]
+            genScaledAdd(size, lea->GetRegNum(), memBase->GetRegNum(), index->GetRegNum(), scale);
+        }
+    }
+    else if (lea->Base())
+    {
+        GenTree* memBase = lea->Base();
+
+        if ((-2048 <= offset) && (offset <= 2047))
+        {
+            if (offset != 0)
+            {
+                // Then compute target reg from [memBase + offset]
+                emit->emitIns_R_R_I(INS_addi_d, size, lea->GetRegNum(), memBase->GetRegNum(), offset);
+            }
+            else // offset is zero
+            {
+                if (lea->GetRegNum() != memBase->GetRegNum())
+                {
+                    emit->emitIns_R_R_I(INS_ori, size, lea->GetRegNum(), memBase->GetRegNum(), 0);
+                }
+            }
+        }
+        else
+        {
+            // We require a tmpReg to hold the offset
+            regNumber tmpReg = lea->GetSingleTempReg();
+
+            // First load tmpReg with the large offset constant
+            GetEmitter()->emitIns_I_la(EA_PTRSIZE, tmpReg, offset);
+
+            // Then compute target reg from [memBase + tmpReg]
+            emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg);
+        }
+    }
+    else if (lea->Index())
+    {
+        // If we encounter a GT_LEA node without a base it means it came out
+        // when attempting to optimize an arbitrary arithmetic expression during lower.
+        // This is currently disabled in LOONGARCH64 since we need to adjust lower to account
+        // for the simpler instructions LOONGARCH64 supports.
+        // TODO-LOONGARCH64-CQ:  Fix this and let LEA optimize arithmetic trees too.
+        assert(!"We shouldn't see a baseless address computation during CodeGen for LOONGARCH64");
+    }
+
+    genProduceReg(lea);
+}
+
+//------------------------------------------------------------------------
+// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP.
+//
+// Notes:
+//      On LOONGARCH64, this only does the probing; allocating the frame is done when callee-saved registers are saved.
+//      This is done before anything has been pushed. The previous frame might have a large outgoing argument
+//      space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
+//      not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
+//      there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
+//      page by default, so we need to be more careful. We do an extra probe if we might not have probed
+//      recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
+//      on Windows as well just to be consistent, even though it should not be necessary.
+//
+void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (frameSize == 0)
+    {
+        return;
+    }
+
+    const target_size_t pageSize = compiler->eeGetPageSize();
+
+    // What offset from the final SP was the last probe? If we haven't probed almost a complete page, and
+    // if the next action on the stack might subtract from SP first, before touching the current SP, then
+    // we do one more probe at the very bottom. This can happen if we call a function on arm64 that does
+    // a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64,
+    // but we don't alter SP.
+    target_size_t lastTouchDelta = 0;
+
+    assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
+
+    if (frameSize < pageSize)
+    {
+        lastTouchDelta = frameSize;
+    }
+    else if (frameSize < 3 * pageSize)
+    {
+        // We don't need a register for the target of the dummy load
+        // ld_w $0,offset(base) will ignor the addr-exception.
+        regNumber rTemp = REG_R0;
+        lastTouchDelta  = frameSize;
+
+        for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
+        {
+            // Generate:
+            //    lw rTemp, -probeOffset(SP)  // load into initReg
+            GetEmitter()->emitIns_I_la(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
+            GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, rTemp, REG_SPBASE, initReg);
+            regSet.verifyRegUsed(initReg);
+            *pInitRegZeroed = false; // The initReg does not contain zero
+
+            lastTouchDelta -= pageSize;
+        }
+
+        assert(lastTouchDelta == frameSize % pageSize);
+        compiler->unwindPadding();
+    }
+    else
+    {
+        assert(frameSize >= 3 * pageSize);
+
+        // Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change
+        // until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl
+        // the stack afterward (which means the stack pointer needs to be known).
+        //
+        // LOONGARCH64 needs 2 registers. See VERY_LARGE_FRAME_SIZE_REG_MASK for how these
+        // are reserved.
+
+        regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
+        availMask &= ~maskArgRegsLiveIn;   // Remove all of the incoming argument registers as they are currently live
+        availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
+
+        regNumber rOffset = initReg;
+        regNumber rLimit;
+        regMaskTP tempMask;
+
+        // We don't need a register for the target of the dummy load
+        // ld_w $0,offset(base) will ignor the addr-exception.
+        regNumber rTemp = REG_R0;
+
+        // We pick the next lowest register number for rLimit
+        noway_assert(availMask != RBM_NONE);
+        tempMask = genFindLowestBit(availMask);
+        rLimit   = genRegNumFromMask(tempMask);
+        availMask &= ~tempMask;
+
+        // Generate:
+        //
+        //      instGen_Set_Reg_To_Imm(EA_PTRSIZE, rOffset, -(ssize_t)pageSize);
+        //      instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(ssize_t)frameSize);
+        //      INS_lu12i_w, REG_R21, pageSize >> 12
+        //
+        // loop:
+        //      ldx_w  rTemp, sp, rOffset,
+        //      sub_d  rOffset, rOffset, REG_R21
+        //      bge rOffset, rLimit, loop                 // If rLimit is less or equal rOffset, we need to probe this rOffset.
+
+        noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
+
+        GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, rOffset, -(ssize_t)pageSize >> 12);
+        regSet.verifyRegUsed(rOffset);
+        GetEmitter()->emitIns_I_la(EA_PTRSIZE, rLimit, -(ssize_t)frameSize);
+        regSet.verifyRegUsed(rLimit);
+
+        assert(!(pageSize & 0xfff));
+        GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, pageSize >> 12);
+
+        // There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic
+        // `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions.
+
+        GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
+        GetEmitter()->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, rOffset, rOffset, REG_R21);
+
+        assert(REG_R21 != rLimit);
+        assert(REG_R21 != rOffset);
+        ssize_t imm = -2 << 2;
+        GetEmitter()->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, imm);
+
+        *pInitRegZeroed = false; // The initReg does not contain zero
+
+        compiler->unwindPadding();
+
+        lastTouchDelta = frameSize % pageSize;
+    }
+
+    if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
+    {
+
+        assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize);
+        GetEmitter()->emitIns_I_la(EA_PTRSIZE, initReg, -(ssize_t)frameSize);
+        GetEmitter()->emitIns_R_R_R(INS_ldx_w, EA_4BYTE, REG_R0, REG_SPBASE, initReg);
+        compiler->unwindPadding();
+
+        regSet.verifyRegUsed(initReg);
+        *pInitRegZeroed = false; // The initReg does not contain zero
+    }
+}
+
+inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2)
+{
+    assert(INS_beq <= ins && ins <= INS_bgeu);
+
+    bool useThrowHlpBlk = compiler->fgUseThrowHelperBlocks();
+
+    emitter* emit = GetEmitter();
+    if (useThrowHlpBlk)
+    {
+        // For code with throw helper blocks, find and use the helper block for
+        // raising the exception. The block may be shared by other trees too.
+
+        BasicBlock* excpRaisingBlock;
+
+        if (failBlk != nullptr)
+        {
+            // We already know which block to jump to. Use that.
+            excpRaisingBlock = failBlk;
+
+#ifdef DEBUG
+            Compiler::AddCodeDsc* add =
+                compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
+            assert(excpRaisingBlock == add->acdDstBlk);
+#if !FEATURE_FIXED_OUT_ARGS
+            assert(add->acdStkLvlInit || isFramePointerUsed());
+#endif // !FEATURE_FIXED_OUT_ARGS
+#endif // DEBUG
+        }
+        else
+        {
+            // Find the helper-block which raises the exception.
+            Compiler::AddCodeDsc* add =
+                compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
+            PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
+            excpRaisingBlock = add->acdDstBlk;
+#if !FEATURE_FIXED_OUT_ARGS
+            assert(add->acdStkLvlInit || isFramePointerUsed());
+#endif // !FEATURE_FIXED_OUT_ARGS
+        }
+
+        noway_assert(excpRaisingBlock != nullptr);
+
+        // Jump to the exception-throwing block on error.
+        emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5));//5-bits;
+    }
+    else
+    {
+        // The code to throw the exception will be generated inline, and
+        //  we will jump around it in the normal non-exception case.
+
+        void* pAddr = nullptr;
+        void* addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr);
+        emitter::EmitCallType callType;
+        regNumber callTarget;
+
+        // maybe optimize
+        // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne)));
+        if(ins == INS_blt)
+            ins = INS_bge;
+        else if(ins == INS_bltu)
+            ins = INS_bgeu;
+        else if(ins == INS_bge)
+            ins = INS_blt;
+        else if(ins == INS_bgeu)
+            ins = INS_bltu;
+        else
+            ins = ins == INS_beq ? INS_bne : INS_beq;
+        if (addr == nullptr)
+        {
+            callType = emitter::EC_INDIR_R;
+            callTarget = REG_DEFAULT_HELPER_CALL_TARGET;
+
+            //ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.//TODO: maybe optimize.
+
+            //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+            //emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
+            if (compiler->opts.compReloc)
+            {
+                ssize_t imm = (2 + 1) << 2;// , 1=jirl.
+                emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
+                GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+            }
+            else
+            {
+                ssize_t imm = (3 + 1) << 2;// , 1=jirl.
+                emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
+
+                //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
+                //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
+                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12);
+                GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32);
+                GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2);
+            }
+        }
+        else
+        {//INS_OPTS_C
+            callType = emitter::EC_FUNC_TOKEN;
+            callTarget = REG_NA;
+
+            ssize_t imm = 5 << 2;
+            if (compiler->opts.compReloc)
+                imm = 3 << 2;
+
+            emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
+        }
+
+        emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)), INDEBUG_LDISASM_COMMA(nullptr) addr, 0,
+                                   EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                   gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
+                                   callTarget,                             /* ireg */
+                                   REG_NA, 0, 0,                           /* xreg, xmul, disp */
+                                   false                                   /* isJump */
+                                   );
+
+        regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind)));
+        regSet.verifyRegistersUsed(killMask);
+    }
+}
+
+//-----------------------------------------------------------------------------------
+// instGen_MemoryBarrier: Emit a MemoryBarrier instruction
+//
+// Arguments:
+//     barrierKind - kind of barrier to emit (Only supports the Full now!! This depends on the CPU).
+//
+// Notes:
+//     All MemoryBarriers instructions can be removed by DOTNET_JitNoMemoryBarriers=1
+//
+void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
+{
+#ifdef DEBUG
+    if (JitConfig.JitNoMemoryBarriers() == 1)
+    {
+        return;
+    }
+#endif // DEBUG
+
+    // TODO: Use the exact barrier type depending on the CPU.
+    GetEmitter()->emitIns_I(INS_dbar, EA_4BYTE, INS_BARRIER_FULL);
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+//     helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
+{
+    assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
+    // Only hook if profiler says it's okay.
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    compiler->info.compProfilerCallback = true;
+
+    // Need to save on to the stack level, since the helper call will pop the argument
+    unsigned saveStackLvl2 = genStackLevel;
+
+    /* Restore the stack level */
+    SetStackLevel(saveStackLvl2);
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Push any callee-saved registers we have used
+ */
+
+void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+#if ETW_EBP_FRAMED
+    if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
+    {
+        noway_assert(!"Used register RBM_FPBASE as a scratch register!");
+    }
+#endif
+
+    // On LA we push the FP (frame-pointer) here along with all other callee saved registers
+    if (isFramePointerUsed())
+        rsPushRegs |= RBM_FPBASE;
+
+    //
+    // It may be possible to skip pushing/popping ra for leaf methods. However, such optimization would require
+    // changes in GC suspension architecture.
+    //
+    // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
+    // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
+    // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
+    // on the return address to be saved on the stack. If we skipped pushing/popping ra, the return address would never
+    // be saved on the stack and the GC suspension would time out.
+    //
+    // So if we wanted to skip pushing/popping ra for leaf frames, we would also need to do one of
+    // the following to make GC suspension work in the above scenario:
+    // - Make return address hijacking work even when ra is not saved on the stack.
+    // - Generate fully interruptible code for loops that contains calls
+    // - Generate fully interruptible code for leaf methods
+    //
+    // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
+    // is not worth it.
+    //
+
+    rsPushRegs |= RBM_RA; // We must save the return address (in the RA register).
+    regSet.rsMaskCalleeSaved = rsPushRegs;
+    regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
+    regMaskTP maskSaveRegsInt   = rsPushRegs & ~maskSaveRegsFloat;
+
+#ifdef DEBUG
+    if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
+    {
+        printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
+               compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
+        dspRegMask(rsPushRegs);
+        printf("\n");
+        assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
+    }
+#endif // DEBUG
+
+    // See the document "LOONGARCH64 JIT Frame Layout" and/or "LOONGARCH64 Exception Data" for more details or requirements and
+    // options. Case numbers in comments here refer to this document. See also Compiler::lvaAssignFrameOffsets()
+    // for pictures of the general frame layouts, and CodeGen::genFuncletProlog() implementations (per architecture)
+    // for pictures of the funclet frame layouts.
+    //
+    // For most frames, generate, e.g.:
+    //      sdc1 f31, off+7*8(sp)
+    //      ...
+    //      sdc1 f24, off(sp)
+    //
+    //      sd s7, off2+7*8(sp)
+    //      ...
+    //      sd s1, off2+8(sp)
+    //      sd s0, off2(sp)
+    //
+    //      sd fp,  0(sp)
+    //      sd ra,  8(sp)
+    //
+    // Notes:
+    // 1. FP is always saved, and the first store is FP, RA.
+    // 2. General-purpose registers are 8 bytes, floating-point registers are 8 bytes, but SIMD/FP registers 16 bytes.
+    //    TODO: supporting SIMD feature !
+    // 3. For frames with varargs, not implemented completely and not tested !
+    // 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
+    //
+    // For functions with GS and localloc, we change the frame so the frame pointer and RA are saved at the top
+    // of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same
+    // rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP.
+    // Since this frame type is relatively rare, we force using it via stress modes, for additional coverage.
+    //
+    // The frames look like the following (simplified to only include components that matter for establishing the
+    // frames). See also Compiler::lvaAssignFrameOffsets().
+    //
+    //
+    // Frames with FP, RA saved at bottom of frame (above outgoing argument space):
+    //
+    //      |                       |
+    //      |-----------------------|
+    //      |  incoming arguments   |
+    //      +=======================+ <---- Caller's SP
+    //      |     Arguments  Or     | // if needed.
+    //      |  Varargs regs space   | // Only for varargs functions; 64 bytes (TODO: not implement completely)
+    //      |-----------------------|
+    //      |Callee saved registers | // not including FP/RA; multiple of 8 bytes
+    //      |-----------------------|
+    //      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+    //      |-----------------------|
+    //      | locals, temps, etc.   |
+    //      |-----------------------|
+    //      |  possible GS cookie   |
+    //      |-----------------------|
+    //      |      Saved RA         | // 8 bytes
+    //      |-----------------------|
+    //      |      Saved FP         | // 8 bytes
+    //      |-----------------------|
+    //      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+    //      |-----------------------| <---- Ambient SP
+    //      |       |               |
+    //      ~       | Stack grows   ~
+    //      |       | downward      |
+    //              V
+    //
+    //
+    // Frames with FP, RA saved at top of frame (note: above all callee-saved regs):
+    //
+    //      |                       |
+    //      |-----------------------|
+    //      |  incoming arguments   |
+    //      +=======================+ <---- Caller's SP
+    //      |     Arguments  Or     | // if needed.
+    //      |  Varargs regs space   | // Only for varargs functions; 64 bytes (TODO: not implement completely)
+    //      |-----------------------|
+    //      |      Saved RA         | // 8 bytes
+    //      |-----------------------|
+    //      |      Saved FP         | // 8 bytes
+    //      |-----------------------|
+    //      |Callee saved registers | // not including FP/RA; multiple of 8 bytes
+    //      |-----------------------|
+    //      |        PSP slot       | // 8 bytes (omitted in CoreRT ABI)
+    //      |-----------------------|
+    //      | locals, temps, etc.   |
+    //      |-----------------------|
+    //      |  possible GS cookie   |
+    //      |-----------------------|
+    //      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+    //      |-----------------------| <---- Ambient SP
+    //      |       |               |
+    //      ~       | Stack grows   ~
+    //      |       | downward      |
+    //              V
+    //
+
+    int totalFrameSize = genTotalFrameSize();
+
+    int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Save float regs: ");
+        dspRegMask(maskSaveRegsFloat);
+        printf("\n");
+        printf("Save int   regs: ");
+        dspRegMask(maskSaveRegsInt);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we
+    // generate based on various sizes.
+    int frameType = 0;
+
+    // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the
+    // first save instruction as a "predecrement" amount, if possible.
+    int calleeSaveSPDelta = 0;
+
+    // By default, we'll establish the frame pointer chain. (Note that currently frames without FP are NYI.)
+    bool establishFramePointer = true;
+
+    // If we do establish the frame pointer, what is the amount we add to SP to do so?
+    unsigned offsetSpToSavedFp = 0;
+
+    if (isFramePointerUsed())
+    {
+        // We need to save both FP and RA.
+
+        assert((maskSaveRegsInt & RBM_FP) != 0);
+        assert((maskSaveRegsInt & RBM_RA) != 0);
+
+        // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address
+        // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses,
+        // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will
+        // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our
+        // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above
+        // the GS cookie.
+        //
+        // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to
+        // create a frame pointer chain.
+        //
+
+        if (totalFrameSize < 2048)
+        {
+            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize);
+            compiler->unwindAllocStack(totalFrameSize);
+
+            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+            {
+                // Case #1.
+                //
+                // Generate:
+                //      daddiu sp, sp, -framesz
+                //      sd fp, outsz(sp)
+                //      sd ra, outsz+8(sp)
+                //
+                // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld.
+                //
+                // After saving callee-saved registers, we establish the frame pointer with:
+                //      daddiu fp, sp, offset-fp
+                // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+
+                JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
+                        totalFrameSize, compiler->compLclFrameSize);
+
+                frameType = 1;
+
+                offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
+
+//#ifdef OPTIMIZE_LOONGSON_EXT
+//                if (!(offsetSpToSavedFp & 0xf) && (offsetSpToSavedFp <= 0xff0))
+//                {
+//                    GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offsetSpToSavedFp >> 4);
+//                    compiler->unwindSaveRegPair(REG_FP, REG_RA, offsetSpToSavedFp);
+//                }
+//                else
+//#endif
+//                {
+                GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp);
+                compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp);
+
+                GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8);
+                compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8);
+//                }
+
+                maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
+
+                offset = compiler->compLclFrameSize + 2*REGSIZE_BYTES;//FP/RA
+            }
+            else
+            {
+                frameType = 2;
+
+                offsetSpToSavedFp = genSPtoFPdelta();
+
+                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, fpDelta:%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
+                        totalFrameSize, compiler->compLclFrameSize, offsetSpToSavedFp);
+
+                offset = compiler->compLclFrameSize;
+            }
+        }
+        else
+        {
+            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+            {
+                JITDUMP("Frame type 3. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
+                        totalFrameSize, compiler->compLclFrameSize);
+
+                frameType = 3;
+
+                maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
+
+                offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
+                offset = calleeSaveSPDelta - offset;
+            }
+            else
+            {
+                frameType = 4;
+
+                JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, SPDelta-1:%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
+                        totalFrameSize, compiler->compLclFrameSize, calleeSaveSPDelta);
+
+                offset = totalFrameSize - compiler->compLclFrameSize;
+                calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
+                offset = calleeSaveSPDelta - offset;
+                offsetSpToSavedFp = offset + REGSIZE_BYTES;
+            }
+        }
+    }
+    else
+    {
+        // No frame pointer (no chaining).
+        assert((maskSaveRegsInt & RBM_FP) == 0);
+        assert((maskSaveRegsInt & RBM_RA) != 0);
+
+        // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using
+        // 'sd' if we only have one callee-saved register plus RA to save.
+
+        NYI("Frame without frame pointer");
+        offset = 0;
+    }
+
+    assert(frameType != 0);
+
+    JITDUMP("    offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta);
+    genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
+
+    // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
+    // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
+    // need to add codes at all.
+
+    //if (compiler->info.compIsVarArgs)
+    //{
+    //    JITDUMP("    compIsVarArgs=true\n");
+
+    //    // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
+    //    assert((offset % 16) == 0);
+    //    for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
+    //    {
+    //        regNumber reg2 = REG_NEXT(reg1);
+    //        // sd REG, offset(SP)
+    //        // sd REG + 1, (offset+8)(SP)
+    //        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg1, REG_SPBASE, offset);
+    //        compiler->unwindNop();
+    //        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg2, REG_SPBASE, offset + 8);
+    //        compiler->unwindNop();
+    //        offset += 2 * REGSIZE_BYTES;
+    //    }
+    //}
+
+#ifdef DEBUG
+    if (compiler->opts.disAsm)
+        printf("DEBUG: LOONGARCH64, frameType:%d\n\n", frameType);
+#endif
+    if (frameType == 1)
+    {
+        //offsetSpToSavedFp = genSPtoFPdelta();
+    }
+    else if (frameType == 2)
+    {
+        //offsetSpToSavedFp = genSPtoFPdelta();
+    }
+    else if (frameType == 3)
+    {
+        if (compiler->lvaOutgoingArgSpaceSize >= 2040)
+        {
+            offset = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
+            calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
+            offset = calleeSaveSPDelta - offset;
+
+            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
+
+            offsetSpToSavedFp = offset;
+
+//#ifdef OPTIMIZE_LOONGSON_EXT
+//            if (!(offset & 0xf) && (offset <= 0xff0))
+//            {
+//                GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4);
+//                compiler->unwindSaveRegPair(REG_FP, REG_RA, offset);
+//            }
+//            else
+//#endif
+//            {
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
+            compiler->unwindSaveReg(REG_FP, offset);
+
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
+            compiler->unwindSaveReg(REG_RA, offset + 8);
+//            }
+
+            genEstablishFramePointer(offset, /* reportUnwindData */ true);
+
+            calleeSaveSPDelta = compiler->lvaOutgoingArgSpaceSize & ~0xf;
+            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
+        }
+        else
+        {
+            calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta;
+            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
+
+            offset = compiler->lvaOutgoingArgSpaceSize;
+
+//#ifdef OPTIMIZE_LOONGSON_EXT
+//            if (!(offset & 0xf) && (offset <= 0xff0))
+//            {
+//                GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4);
+//                compiler->unwindSaveRegPair(REG_FP, REG_RA, offset);
+//            }
+//            else
+//#endif
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
+            compiler->unwindSaveReg(REG_FP, offset);
+
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
+            compiler->unwindSaveReg(REG_RA, offset + 8);
+
+            genEstablishFramePointer(offset, /* reportUnwindData */ true);
+        }
+
+        establishFramePointer = false;
+    }
+    else if (frameType == 4)
+    {
+        genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true);
+
+        establishFramePointer = false;
+
+        int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
+
+        if (remainingFrameSz > 0)
+        {
+            genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ true);
+        }
+    }
+    else
+    {
+        unreached();
+    }
+
+    if (establishFramePointer)
+    {
+        JITDUMP("    offsetSpToSavedFp=%d\n", offsetSpToSavedFp);
+        genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true);
+    }
+}
+
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+//     initReg        - register to use as scratch register
+//     pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+//                      set to non-zero value after this call.
+//
+// Return Value:
+//     None
+//
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    // Give profiler a chance to back out of hooking this method
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+}
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 3e9059eb156af..3fe0f14f9b750 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -538,12 +538,12 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
             useType = TYP_SHORT;
             break;
 
-#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI)
+#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
         case 3:
             useType = TYP_INT;
             break;
 
-#endif // !TARGET_XARCH || UNIX_AMD64_ABI
+#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64
 
 #ifdef TARGET_64BIT
         case 4:
@@ -551,14 +551,14 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
             useType = TYP_INT;
             break;
 
-#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI)
+#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
         case 5:
         case 6:
         case 7:
             useType = TYP_I_IMPL;
             break;
 
-#endif // !TARGET_XARCH || UNIX_AMD64_ABI
+#endif // !TARGET_XARCH || UNIX_AMD64_ABI || TARGET_LOONGARCH64
 #endif // TARGET_64BIT
 
         case TARGET_POINTER_SIZE:
@@ -757,6 +757,27 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
                 howToPassStruct = SPK_ByValue;
                 useType         = TYP_STRUCT;
 
+#elif defined(TARGET_LOONGARCH64)
+                // Structs that are pointer sized or smaller.
+                //assert(structSize > TARGET_POINTER_SIZE);
+
+                // On LOONGARCH64 structs that are 1-16 bytes are passed by value in one/multiple register(s)
+                if (structSize <= (TARGET_POINTER_SIZE * 2))
+                {
+                    // setup wbPassType and useType indicate that this is passed by value in multiple registers
+                    //  (when all of the parameters registers are used, then the stack will be used)
+                    howToPassStruct = SPK_ByValue;
+                    useType         = TYP_STRUCT;
+                }
+                else // a structSize that is 17-32 bytes in size
+                {
+                    // Otherwise we pass this struct by reference to a copy
+                    // setup wbPassType and useType indicate that this is passed using one register
+                    //  (by reference to a copy)
+                    howToPassStruct = SPK_ByReference;
+                    useType         = TYP_UNKNOWN;
+                }
+
 #else //  TARGET_XXX
 
                 noway_assert(!"Unhandled TARGET in getArgTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
@@ -777,7 +798,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
             howToPassStruct = SPK_ByValue;
             useType         = TYP_STRUCT;
 
-#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)  || defined(TARGET_LOONGARCH64)
 
             // Otherwise we pass this struct by reference to a copy
             // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy)
@@ -911,6 +932,24 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
         useType             = TYP_UNKNOWN;
     }
 
+#ifdef TARGET_LOONGARCH64
+    if (structSize <= (TARGET_POINTER_SIZE * 2))
+    {
+        DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(clsHnd);
+
+        if (numFloatFields & 0x1)
+        {
+            howToReturnStruct = SPK_PrimitiveType;
+            useType           = structSize > 4 ? TYP_DOUBLE : TYP_FLOAT;
+        }
+        else if (numFloatFields & 0xE)
+        {
+            howToReturnStruct = SPK_ByValue;
+            useType           = TYP_STRUCT;
+        }
+    }
+#endif //TARGET_LOONGARCH64
+
     // Check for cases where a small struct is returned in a register
     // via a primitive type.
     //
@@ -1044,6 +1083,24 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
                 howToReturnStruct = SPK_ByReference;
                 useType           = TYP_UNKNOWN;
 
+#elif defined(TARGET_LOONGARCH64)
+
+                // On LOONGARCH64 structs that are 1-16 bytes are returned by value in one/multiple register(s)
+                if (structSize <= (TARGET_POINTER_SIZE * 2))
+                {
+                    // setup wbPassType and useType indicate that this is return by value in multiple registers
+                    howToReturnStruct = SPK_ByValue;
+                    useType           = TYP_STRUCT;
+                }
+                else // a structSize that is 17-32 bytes in size
+                {
+                    // Otherwise we return this struct using a return buffer/byreference.
+                    // setup wbPassType and useType indicate that this is returned using a return buffer register
+                    //  (reference to a return buffer)
+                    howToReturnStruct = SPK_ByReference;
+                    useType           = TYP_UNKNOWN;
+                }
+
 #else //  TARGET_XXX
 
                 noway_assert(!"Unhandled TARGET in getReturnTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
@@ -2222,6 +2279,8 @@ void Compiler::compSetProcessor()
         info.genCPU = CPU_X86_PENTIUM_4;
     else
         info.genCPU = CPU_X86;
+#elif defined(TARGET_LOONGARCH64)
+    info.genCPU = CPU_LOONGARCH64;
 #endif
 
     //
@@ -2404,6 +2463,10 @@ void Compiler::compSetProcessor()
     }
 #endif
 
+#if defined(TARGET_LOONGARCH64)
+    //TODO: should add LOONGARCH64's features for LOONGARCH64.
+#endif
+
     instructionSetFlags = EnsureInstructionSetFlagsAreValid(instructionSetFlags);
     opts.setSupportedISAs(instructionSetFlags);
 
@@ -2588,6 +2651,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
         // For non-adaptive, padding limit is same as specified by the alignment.
         opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary;
     }
+#elif defined(TARGET_LOONGARCH64)
+    //TODO: should be adaptive on LoongArch64.
 #endif
 
     assert(isPow2(opts.compJitAlignLoopBoundary));
@@ -2934,6 +2999,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     opts.compJitSaveFpLrWithCalleeSavedRegisters = 0;
 #endif // defined(TARGET_ARM64)
 
+#if defined(TARGET_LOONGARCH64)
+    // 0 is default: use the appropriate frame type based on the function.
+    opts.compJitSaveFpRaWithCalleeSavedRegisters = 0;
+#endif // defined(TARGET_LOONGARCH64)
+
 #ifdef DEBUG
     opts.dspInstrs       = false;
     opts.dspLines        = false;
@@ -3432,6 +3502,13 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
         opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters();
     }
 #endif // defined(DEBUG) && defined(TARGET_ARM64)
+
+#if defined(DEBUG) && defined(TARGET_LOONGARCH64)
+    if ((s_pJitMethodSet == nullptr) || s_pJitMethodSet->IsActiveMethod(info.compFullName, info.compMethodHash()))
+    {
+        opts.compJitSaveFpRaWithCalleeSavedRegisters = JitConfig.JitSaveFpRaWithCalleeSavedRegisters();
+    }
+#endif // defined(DEBUG) && defined(TARGET_LOONGARCH64)
 }
 
 #ifdef DEBUG
@@ -4030,7 +4107,7 @@ void Compiler::compSetOptimizationLevel()
     fgCanRelocateEHRegions = true;
 }
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 // Function compRsvdRegCheck:
 //  given a curState to use for calculating the total frame size
 //  it will return true if the REG_OPT_RSVD should be reserved so
@@ -4075,6 +4152,10 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState)
     JITDUMP(" Returning true (ARM64)\n\n");
     return true; // just always assume we'll need it, for now
 
+#elif defined(TARGET_LOONGARCH64)
+    JITDUMP(" Returning true (LOONGARCH64)\n\n");
+    return true; // just always assume we'll need it, for now
+
 #else  // TARGET_ARM
 
     // frame layout:
@@ -4198,7 +4279,7 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState)
     return false;
 #endif // TARGET_ARM
 }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 //------------------------------------------------------------------------
 // compGetTieringName: get a string describing tiered compilation settings
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index ea088cccdd529..9541b47114f30 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -475,7 +475,13 @@ class LclVarDsc
 
 #if defined(TARGET_AMD64) || defined(TARGET_ARM64)
     unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref.
-#endif                                   // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#elif defined(TARGET_LOONGARCH64)
+    unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref.
+    unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64.
+    unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64.
+    unsigned char lvIsSplit : 1;   // Set if the argument is splited. also used the lvFldOffset.
+#endif  // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH)
+
 
 #if OPT_BOOL_OPS
     unsigned char lvIsBoolean : 1; // set if variable is boolean
@@ -674,6 +680,9 @@ class LclVarDsc
     {
         assert(lvIsHfa());
         assert(varTypeIsStruct(lvType));
+#if defined(TARGET_LOONGARCH64)
+        assert(!"lvHfaSlots called not support on LOONGARCH64!");
+#endif
         unsigned slots = 0;
 #ifdef TARGET_ARM
         slots = lvExactSize / sizeof(float);
@@ -990,7 +999,7 @@ class LclVarDsc
         }
 #endif
         assert(m_layout != nullptr);
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         assert(varTypeIsStruct(TypeGet()) || (lvIsImplicitByRef && (TypeGet() == TYP_BYREF)));
 #else
         assert(varTypeIsStruct(TypeGet()));
@@ -1614,7 +1623,7 @@ struct FuncInfoDsc
     emitLocation* coldStartLoc; // locations for the cold section, if there is one.
     emitLocation* coldEndLoc;
 
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
     UnwindInfo  uwi;     // Unwind information for this function/funclet's hot  section
     UnwindInfo* uwiCold; // Unwind information for this function/funclet's cold section
@@ -1629,7 +1638,7 @@ struct FuncInfoDsc
     emitLocation* coldStartLoc; // locations for the cold section, if there is one.
     emitLocation* coldEndLoc;
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 #if defined(FEATURE_CFI_SUPPORT)
     jitstd::vector<CFI_CODE>* cfiCodes;
@@ -2148,7 +2157,7 @@ struct fgArgTabEntry
     // register numbers.
     void SetMultiRegNums()
     {
-#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI)
+#if FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
         if (numRegs == 1)
         {
             return;
@@ -2169,7 +2178,7 @@ struct fgArgTabEntry
             argReg = (regNumber)(argReg + regSize);
             setRegNum(regIndex, argReg);
         }
-#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI)
+#endif // FEATURE_MULTIREG_ARGS && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
     }
 
 #ifdef DEBUG
@@ -2287,6 +2296,20 @@ class fgArgInfo
                              const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr);
 #endif // UNIX_AMD64_ABI
 
+#if defined(TARGET_LOONGARCH64)
+    fgArgTabEntry* AddRegArg(unsigned          argNum,
+                             GenTree*          node,
+                             GenTreeCall::Use* use,
+                             regNumber         regNum,
+                             unsigned          numRegs,
+                             unsigned          byteSize,
+                             unsigned          byteAlignment,
+                             bool              isStruct,
+                             bool              isFloatHfa, /* unused */
+                             bool              isVararg,
+                             const regNumber   nextOtherRegNum);
+#endif
+
     fgArgTabEntry* AddStkArg(unsigned          argNum,
                              GenTree*          node,
                              GenTreeCall::Use* use,
@@ -4037,7 +4060,7 @@ class Compiler
     // For ARM64, this is structs larger than 16 bytes that are passed by reference.
     bool lvaIsImplicitByRefLocal(unsigned varNum)
     {
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         LclVarDsc* varDsc = lvaGetDesc(varNum);
         if (varDsc->lvIsImplicitByRef)
         {
@@ -4046,7 +4069,7 @@ class Compiler
             assert(varTypeIsStruct(varDsc) || (varDsc->lvType == TYP_BYREF));
             return true;
         }
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         return false;
     }
 
@@ -8019,9 +8042,14 @@ class Compiler
         // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
         return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
     }
-#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
+#elif defined(TARGET_LOONGARCH64)
+    static bool varTypeNeedsPartialCalleeSave(var_types type)
+    {//TODO: supporting SIMD feature for LoongArch64.
+        return false;
+    }
+#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
 #error("Unknown target architecture for FEATURE_SIMD")
-#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
+#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
 protected:
@@ -8194,6 +8222,9 @@ class Compiler
 #elif defined(TARGET_ARM64)
             reg     = REG_R11;
             regMask = RBM_R11;
+#elif defined(TARGET_LOONGARCH64)
+            reg     = REG_T8;
+            regMask = RBM_T8;
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -8612,6 +8643,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void unwindReturn(regNumber reg);                                             // ret lr
 #endif                                                                            // defined(TARGET_ARM64)
 
+#if defined(TARGET_LOONGARCH64)
+    void unwindNop();
+    void unwindPadding(); // Generate a sequence of unwind NOP codes representing instructions between the last
+                          // instruction and the current location.
+    void unwindSaveReg(regNumber reg, int offset);
+    void unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset);
+    void unwindReturn(regNumber reg);
+#endif // defined(TARGET_LOONGARCH64)
+
     //
     // Private "helper" functions for the unwind implementation.
     //
@@ -8697,9 +8737,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
 #elif defined(TARGET_ARM64)
         CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
+#elif defined(TARGET_LOONGARCH64)
+        //TODO: supporting SIMD feature for LoongArch64.
+        assert(!"unimplemented yet on LA");
+        CORINFO_InstructionSet minimumIsa = 0;
 #else
 #error Unsupported platform
-#endif // !TARGET_XARCH && !TARGET_ARM64
+#endif // !TARGET_XARCH && !TARGET_ARM64 && !TARGET_LOONGARCH64
 
         return compOpportunisticallyDependsOn(minimumIsa) && JitConfig.EnableHWIntrinsic();
 #else
@@ -9824,6 +9868,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         int compJitSaveFpLrWithCalleeSavedRegisters;
 #endif // defined(TARGET_ARM64)
 
+#if defined(TARGET_LOONGARCH64)
+        // Decision about whether to save FP/RA registers with callee-saved registers (see
+        // COMPlus_JitSaveFpRaWithCalleSavedRegisters).
+        // TODO: will delete this in future.
+        int compJitSaveFpRaWithCalleeSavedRegisters;
+#endif // defined(TARGET_LOONGARCH64)
+
 #ifdef CONFIGURABLE_ARM_ABI
         bool compUseSoftFP = false;
 #else
@@ -10132,6 +10183,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #define CPU_ARM 0x0300   // The generic ARM CPU
 #define CPU_ARM64 0x0400 // The generic ARM64 CPU
 
+#define CPU_LOONGARCH64 0x0800 // The generic LOONGARCH64 CPU
+
         unsigned genCPU; // What CPU are we running on
 
         // Number of class profile probes in this method
@@ -10654,7 +10707,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void compSetProcessor();
     void compInitDebuggingInfo();
     void compSetOptimizationLevel();
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     bool compRsvdRegCheck(FrameLayoutState curState);
 #endif
     void compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFlags* compileFlags);
@@ -12087,6 +12140,13 @@ const instruction INS_SQRT = INS_fsqrt;
 
 #endif // TARGET_ARM64
 
+#ifdef TARGET_LOONGARCH64
+const instruction INS_BREAKPOINT = INS_break;
+const instruction INS_MULADD     = INS_fmadd_d;// NOTE: default is double.
+const instruction INS_ABS  = INS_fabs_d; // NOTE: default is double.
+const instruction INS_SQRT = INS_fsqrt_d;// NOTE: default is double.
+#endif // TARGET_LOONGARCH64
+
 /*****************************************************************************/
 
 extern const BYTE genTypeSizes[];
diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp
index 8fad38af36f84..e78270ea4b523 100644
--- a/src/coreclr/jit/compiler.hpp
+++ b/src/coreclr/jit/compiler.hpp
@@ -602,7 +602,7 @@ inline bool isRegParamType(var_types type)
 #endif // !TARGET_X86
 }
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 /*****************************************************************************/
 // Returns true if 'type' is a struct that can be enregistered for call args
 //                         or can be returned by value in multiple registers.
@@ -660,7 +660,7 @@ inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types                typ
 
     return result;
 }
-#endif // TARGET_AMD64 || TARGET_ARM64
+#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64
 
 /*****************************************************************************/
 
@@ -1104,14 +1104,14 @@ inline GenTree* Compiler::gtNewFieldRef(var_types typ, CORINFO_FIELD_HANDLE fldH
     {
         unsigned lclNum                  = obj->AsOp()->gtOp1->AsLclVarCommon()->GetLclNum();
         lvaTable[lclNum].lvFieldAccessed = 1;
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         // These structs are passed by reference; we should probably be able to treat these
         // as non-global refs, but downstream logic expects these to be marked this way.
         if (lvaTable[lclNum].lvIsParam)
         {
             tree->gtFlags |= GTF_GLOB_REF;
         }
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     }
     else
     {
@@ -1844,7 +1844,7 @@ inline void LclVarDsc::incRefCnts(weight_t weight, Compiler* comp, RefCountState
 
             bool doubleWeight = lvIsTemp;
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64)|| defined(TARGET_LOONGARCH64)
             // and, for the time being, implicit byref params
             doubleWeight |= lvIsImplicitByRef;
 #endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
@@ -3074,6 +3074,8 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum)
 
 #ifdef TARGET_ARM
     return regNum - REG_F0;
+#elif defined(TARGET_LOONGARCH64)
+    return regNum - REG_F0;
 #elif defined(TARGET_ARM64)
     return regNum - REG_V0;
 #elif defined(UNIX_AMD64_ABI)
diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp
index ebf1ea2945195..b53608757f384 100644
--- a/src/coreclr/jit/ee_il_dll.cpp
+++ b/src/coreclr/jit/ee_il_dll.cpp
@@ -453,6 +453,14 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO*
                 }
             }
         }
+#elif defined(TARGET_LOONGARCH64)
+        // Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference
+        if (structSize > MAX_PASS_MULTIREG_BYTES)
+        {
+            // This struct is passed by reference using a single 'slot'
+            return TARGET_POINTER_SIZE;
+        }
+        //  otherwise will we pass this struct by value in multiple registers
 #elif !defined(TARGET_ARM)
         NYI("unknown target");
 #endif // defined(TARGET_XXX)
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index ba4ab8f7b6caa..86150d141b9a5 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -744,8 +744,11 @@ insGroup* emitter::emitSavIG(bool emitAdd)
         ig->igFlags |= IGF_BYREF_REGS;
 
         // We'll allocate extra space (DWORD aligned) to record the GC regs
-
+#if defined(TARGET_LOONGARCH64)
+        gs += sizeof(regMaskTP);
+#else
         gs += sizeof(int);
+#endif
     }
 
     // Allocate space for the instructions and optional liveset
@@ -758,7 +761,11 @@ insGroup* emitter::emitSavIG(bool emitAdd)
     {
         // Record the byref regs in front the of the instructions
 
+#if defined(TARGET_LOONGARCH64)
+        *castto(id, regMaskTP*)++ = emitInitByrefRegs;
+#else
         *castto(id, unsigned*)++ = (unsigned)emitInitByrefRegs;
+#endif
     }
 
     // Do we need to store the liveset?
@@ -790,10 +797,18 @@ insGroup* emitter::emitSavIG(bool emitAdd)
 
     // Record how many instructions and bytes of code this group contains
 
+#ifdef TARGET_LOONGARCH64
+    noway_assert((unsigned int)emitCurIGinsCnt == emitCurIGinsCnt);
+#else
     noway_assert((BYTE)emitCurIGinsCnt == emitCurIGinsCnt);
+#endif
     noway_assert((unsigned short)emitCurIGsize == emitCurIGsize);
 
+#ifdef TARGET_LOONGARCH64
+    ig->igInsCnt = (unsigned int)emitCurIGinsCnt;
+#else
     ig->igInsCnt = (BYTE)emitCurIGinsCnt;
+#endif
     ig->igSize   = (unsigned short)emitCurIGsize;
     emitCurCodeOffset += emitCurIGsize;
     assert(IsCodeAligned(emitCurCodeOffset));
@@ -1118,6 +1133,10 @@ void emitter::emitBegFN(bool hasFramePtr
     emitFirstColdIG   = nullptr;
     emitTotalCodeSize = 0;
 
+#ifdef TARGET_LOONGARCH64
+    emitCounts_INS_OPTS_J = 0;
+#endif
+
 #if EMITTER_STATS
     emitTotalIGmcnt++;
     emitSizeMethod      = 0;
@@ -1159,6 +1178,11 @@ void emitter::emitBegFN(bool hasFramePtr
 
     ig->igNext = nullptr;
 
+//#ifdef TARGET_LOONGARCH64
+// On future maybe use this.
+//    ig->igJmpCnt = 0;
+//#endif
+
 #ifdef DEBUG
     emitScratchSigInfo = nullptr;
 #endif // DEBUG
@@ -1296,6 +1320,12 @@ weight_t emitter::getCurrentBlockWeight()
     }
 }
 
+#if defined(TARGET_LOONGARCH64)
+void emitter::dispIns(instrDesc* id)
+{
+    assert(!"Not used on LOONGARCH64.");
+}
+#else
 void emitter::dispIns(instrDesc* id)
 {
 #ifdef DEBUG
@@ -1317,6 +1347,7 @@ void emitter::dispIns(instrDesc* id)
     emitIFcounts[id->idInsFmt()]++;
 #endif
 }
+#endif
 
 void emitter::appendToCurIG(instrDesc* id)
 {
@@ -2302,6 +2333,11 @@ void emitter::emitSetFrameRangeGCRs(int offsLo, int offsHi)
 #ifdef TARGET_AMD64
             // doesn't have to be all negative on amd
             printf("-%04X ... %04X\n", -offsLo, offsHi);
+#elif defined(TARGET_LOONGARCH64)
+            if (offsHi < 0)
+                printf("-%04X ... -%04X\n", -offsLo, -offsHi);
+            else
+                printf("-%04X ... %04X\n", -offsLo, offsHi);
 #else
             printf("-%04X ... -%04X\n", -offsLo, -offsHi);
             assert(offsHi <= 0);
@@ -2633,7 +2669,7 @@ const char* emitter::emitLabelString(insGroup* ig)
 
 #endif // DEBUG
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
 // Does the argument location point to an IG at the end of a function or funclet?
 // We can ignore the codePos part of the location, since it doesn't affect the
@@ -2980,7 +3016,9 @@ void emitter::emitGenerateUnwindNop(instrDesc* id, void* context)
     comp->unwindNop(id->idCodeSize());
 #elif defined(TARGET_ARM64)
     comp->unwindNop();
-#endif // defined(TARGET_ARM64)
+#elif defined(TARGET_LOONGARCH64)
+    comp->unwindNop();
+#endif // defined(TARGET_LOONGARCH64)
 }
 
 /*****************************************************************************
@@ -2994,7 +3032,7 @@ void emitter::emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp)
     emitWalkIDs(locFrom, emitGenerateUnwindNop, comp);
 }
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 #if defined(TARGET_ARM)
 
@@ -3377,6 +3415,9 @@ const size_t hexEncodingSize = 19;
 #elif defined(TARGET_ARM)
 const size_t basicIndent     = 12;
 const size_t hexEncodingSize = 11;
+#elif defined(TARGET_LOONGARCH64)
+const size_t basicIndent     = 12;
+const size_t hexEncodingSize = 19;
 #endif
 
 #ifdef DEBUG
@@ -4093,6 +4134,19 @@ void emitter::emitJumpDistBind()
 
     int jmp_iteration = 1;
 
+#ifdef TARGET_LOONGARCH64
+    //NOTE:
+    //  bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J;
+    //  bit1 of isLinkingEnd_LA: indicating not needed updating ths size while emitTotalCodeSize <= (0x7fff << 2) or had updated;
+    unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0;
+
+    UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot.
+    // small  jump max. neg distance
+    NATIVE_OFFSET  nsd = B_DIST_SMALL_MAX_NEG;
+    // small  jump max. pos distance
+    NATIVE_OFFSET  psd = B_DIST_SMALL_MAX_POS - emitCounts_INS_OPTS_J * (3 << 2);//the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+#endif
+
 /*****************************************************************************/
 /* If we iterate to look for more jumps to shorten, we start again here.     */
 /*****************************************************************************/
@@ -4129,9 +4183,11 @@ void emitter::emitJumpDistBind()
 
         UNATIVE_OFFSET jsz; // size of the jump instruction in bytes
 
+#ifndef TARGET_LOONGARCH64
         UNATIVE_OFFSET ssz = 0; // small  jump size
         NATIVE_OFFSET  nsd = 0; // small  jump max. neg distance
         NATIVE_OFFSET  psd = 0; // small  jump max. pos distance
+#endif
 
 #if defined(TARGET_ARM)
         UNATIVE_OFFSET msz = 0; // medium jump size
@@ -4250,7 +4306,14 @@ void emitter::emitJumpDistBind()
 /* Make sure the jumps are properly ordered */
 
 #ifdef DEBUG
+#if defined(TARGET_LOONGARCH64)
+#if defined(UNALIGNED_CHECK_DISABLE)
+	UNALIGNED_CHECK_DISABLE;
+#endif
+        assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < (jmp->idjOffs + adjLJ));
+#else
         assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < jmp->idjOffs);
+#endif
         lastLJ = (lastIG == jmp->idjIG) ? jmp : nullptr;
 
         assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG ||
@@ -4284,10 +4347,19 @@ void emitter::emitJumpDistBind()
                     if (EMITVERBOSE)
                     {
                         printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
-                               lstIG->igOffs - adjIG);
+#if defined(TARGET_LOONGARCH64)
+                               lstIG->igOffs + adjIG
+#else
+                               lstIG->igOffs - adjIG
+#endif
+                            );
                     }
 #endif // DEBUG
+#if defined(TARGET_LOONGARCH64)
+                    lstIG->igOffs += adjIG;
+#else
                     lstIG->igOffs -= adjIG;
+#endif
                     assert(IsCodeAligned(lstIG->igOffs));
                 } while (lstIG != jmpIG);
             }
@@ -4300,7 +4372,11 @@ void emitter::emitJumpDistBind()
 
         /* Apply any local size adjustment to the jump's relative offset */
 
+#if defined(TARGET_LOONGARCH64)
+        jmp->idjOffs += adjLJ;
+#else
         jmp->idjOffs -= adjLJ;
+#endif
 
         // If this is a jump via register, the instruction size does not change, so we are done.
         CLANG_FORMAT_COMMENT_ANCHOR;
@@ -4348,8 +4424,9 @@ void emitter::emitJumpDistBind()
 
             if (jmp->idjShort)
             {
+#ifndef TARGET_LOONGARCH64
                 assert(jmp->idCodeSize() == ssz);
-
+#endif
                 // We should not be jumping/branching across funclets/functions
                 emitCheckFuncletBranch(jmp, jmpIG);
 
@@ -4459,7 +4536,11 @@ void emitter::emitJumpDistBind()
                here and the target could be shortened, causing the actual distance to shrink.
              */
 
+#if defined(TARGET_LOONGARCH64)
+            dstOffs += adjIG;
+#else
             dstOffs -= adjIG;
+#endif
 
             /* Compute the distance estimate */
 
@@ -4494,11 +4575,66 @@ void emitter::emitJumpDistBind()
             }
 #endif // DEBUG_EMIT
 
+#if defined(TARGET_LOONGARCH64)
+            assert(jmpDist >= 0);//Forward jump
+            assert(!(jmpDist & 0x3));
+
+            if (isLinkingEnd_LA & 0x2)
+            {
+                jmp->idAddr()->iiaSetJmpOffset(jmpDist);
+            }
+            else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J))
+            {
+                instruction ins = jmp->idIns();
+                assert((INS_bceqz <= ins) && (ins <= INS_bl));
+
+                if (ins < INS_beqz)  //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                {
+                    if ((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000)
+                    {
+                        extra = 4;
+                    }
+                    else
+                    {
+                        assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO:later will be deleted!!!
+                        extra = 8;
+                    }
+                }
+                else if (ins < INS_b)//   beqz/bnez < b < bl    // See instrsloongarch64.h.
+                {
+                    if (jmpDist + emitCounts_INS_OPTS_J*4 < 0x200000 )
+                        continue;
+
+                    extra = 4;
+                    //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                }
+                else //if (ins == INS_b || ins == INS_bl)
+                {
+                    assert(ins == INS_b || ins == INS_bl);
+                    //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    continue;
+                }
+
+                jmp->idInsOpt(INS_OPTS_JIRL);
+                jmp->idCodeSize(jmp->idCodeSize() + extra);
+                jmpIG->igSize += extra;//the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                adjLJ += extra;
+                adjIG += extra;
+                emitTotalCodeSize += extra;
+                jmpIG->igFlags |= IGF_UPD_ISZ;
+                isLinkingEnd_LA |= 0x1;
+            }
+            continue;
+
+#else // not defined(TARGET_LOONGARCH64)
             if (extra <= 0)
             {
                 /* This jump will be a short one */
                 goto SHORT_JMP;
             }
+#endif
         }
         else
         {
@@ -4537,13 +4673,69 @@ void emitter::emitJumpDistBind()
             }
 #endif // DEBUG_EMIT
 
+#if defined(TARGET_LOONGARCH64)
+            assert(jmpDist >= 0);//Backward jump
+            assert(!(jmpDist & 0x3));
+
+            if (isLinkingEnd_LA & 0x2)
+            {
+                jmp->idAddr()->iiaSetJmpOffset(-jmpDist);//Backward jump is negative!
+            }
+            else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J))
+            {
+                instruction ins = jmp->idIns();
+                assert((INS_bceqz <= ins) && (ins <= INS_bl));
+
+                if (ins < INS_beqz)  //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                {
+                    if ((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000)
+                    {
+                        extra = 4;
+                    }
+                    else
+                    {
+                        assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO:later will be deleted!!!
+                        extra = 8;
+                    }
+                }
+                else if (ins < INS_b)//   beqz/bnez < b < bl    // See instrsloongarch64.h.
+                {
+                    if (jmpDist + emitCounts_INS_OPTS_J*4 < 0x200000 )
+                        continue;
+
+                    extra = 4;
+                    //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                }
+                else //if (ins == INS_b || ins == INS_bl)
+                {
+                    assert(ins == INS_b || ins == INS_bl);
+                    //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO
+                    continue;
+                }
+
+                jmp->idInsOpt(INS_OPTS_JIRL);
+                jmp->idCodeSize(jmp->idCodeSize() + extra);
+                jmpIG->igSize += extra;//the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                adjLJ += extra;
+                adjIG += extra;
+                emitTotalCodeSize += extra;
+                jmpIG->igFlags |= IGF_UPD_ISZ;
+                isLinkingEnd_LA |= 0x1;
+            }
+            continue;
+
+#else // not defined(TARGET_LOONGARCH64)
             if (extra <= 0)
             {
                 /* This jump will be a short one */
                 goto SHORT_JMP;
             }
+#endif
         }
 
+#ifndef TARGET_LOONGARCH64
         /* We arrive here if the jump couldn't be made short, at least for now */
 
         /* We had better not have eagerly marked the jump as short
@@ -4675,6 +4867,8 @@ void emitter::emitJumpDistBind()
         // The size of IF_LARGEJMP/IF_LARGEADR/IF_LARGELDC are 8 or 12.
         // All other code size is 4.
         assert((sizeDif == 4) || (sizeDif == 8));
+#elif defined(TARGET_LOONGARCH64)
+        assert(sizeDif == 0);
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -4735,8 +4929,40 @@ void emitter::emitJumpDistBind()
 
         jmpIG->igFlags |= IGF_UPD_ISZ;
 
+#endif  // not defined(TARGET_LOONGARCH64)
     } // end for each jump
 
+#if defined(TARGET_LOONGARCH64)
+    if ((isLinkingEnd_LA & 0x3) < 0x2)
+    {
+        //indicating had updated the instrDescJmp's size with the type INS_OPTS_J.
+        isLinkingEnd_LA = 0x2;
+        //emitRecomputeIGoffsets();
+        /* Adjust offsets of any remaining blocks */
+
+        for (;lstIG;)
+        {
+            lstIG = lstIG->igNext;
+            if (!lstIG)
+            {
+                break;
+            }
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+                       lstIG->igOffs + adjIG);
+            }
+#endif // DEBUG
+
+            lstIG->igOffs += adjIG;
+
+            assert(IsCodeAligned(lstIG->igOffs));
+        }
+        goto AGAIN;
+    }
+
+#else
     /* Did we shorten any jumps? */
 
     if (adjIG)
@@ -4800,6 +5026,8 @@ void emitter::emitJumpDistBind()
             goto AGAIN;
         }
     }
+#endif
+
 #ifdef DEBUG
     if (EMIT_INSTLIST_VERBOSE)
     {
@@ -5620,6 +5848,11 @@ emitter::instrDescAlign* emitter::emitAlignInNextIG(instrDescAlign* alignInstr)
 
 void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG)
 {
+#ifdef TARGET_LOONGARCH64
+    /* TODO: for LOONGARCH64: not support idDebugOnlyInfo.*/
+    return;
+#else
+
 #ifdef DEBUG
     // We should not be jumping/branching across funclets/functions
     // Except possibly a 'call' to a finally funclet for a local unwind
@@ -5715,6 +5948,7 @@ void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG)
         }
     }
 #endif // DEBUG
+#endif
 }
 
 /*****************************************************************************
@@ -6563,6 +6797,11 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
         ig->igSize = (unsigned short)(cp - bp);
     }
 
+#ifdef TARGET_LOONGARCH64
+    //cp = cp - 4;
+    unsigned actualCodeSize = cp - codeBlock;
+#endif
+
 #if EMIT_TRACK_STACK_DEPTH
     assert(emitCurStackLvl == 0);
 #endif
@@ -6603,6 +6842,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
         emitUpdateLiveGCregs(GCT_GCREF, RBM_NONE, cp);
     }
 
+#ifndef TARGET_LOONGARCH64
     /* Patch any forward jumps */
 
     if (emitFwdJumps)
@@ -6687,6 +6927,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
             }
         }
     }
+#endif   //!TARGET_LOONGARCH64
 
 #ifdef DEBUG
     if (emitComp->opts.disAsm)
@@ -6695,7 +6936,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
     }
 #endif
 
+#ifndef TARGET_LOONGARCH64
     unsigned actualCodeSize = emitCurCodeOffs(cp);
+#endif
 
 #if defined(TARGET_ARM64)
     assert(emitTotalCodeSize == actualCodeSize);
@@ -6786,6 +7029,13 @@ void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp)
  *  instruction number for this instruction
  */
 
+#if defined(TARGET_LOONGARCH64)
+unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch)
+{
+    assert(!"unimplemented yet on LOONGARCH");
+    return -1;
+}
+#else
 unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch)
 {
     instrDesc* id = (instrDesc*)ig->igData;
@@ -6814,6 +7064,7 @@ unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch)
     assert(!"emitFindInsNum failed");
     return -1;
 }
+#endif
 
 /*****************************************************************************
  *
@@ -9264,13 +9515,14 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
             // This uses and defs RDI and RSI.
             result = RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI);
             break;
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
             result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
             break;
 #else
             assert(!"unknown arch");
 #endif
 
+#if !defined(TARGET_LOONGARCH64)
         case CORINFO_HELP_PROF_FCN_ENTER:
             result = RBM_PROFILER_ENTER_TRASH;
             break;
@@ -9287,8 +9539,9 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
         case CORINFO_HELP_PROF_FCN_TAILCALL:
             result = RBM_PROFILER_TAILCALL_TRASH;
             break;
+#endif // !defined(TARGET_LOONGARCH64)
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         case CORINFO_HELP_ASSIGN_REF:
         case CORINFO_HELP_CHECKED_ASSIGN_REF:
             result = RBM_CALLEE_GCTRASH_WRITEBARRIER;
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index ade4f7c3ca2c1..00124e42cac92 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -304,7 +304,11 @@ struct insGroup
     unsigned igStkLvl; // stack level on entry
 #endif
     regMaskSmall  igGCregs; // set of registers with live GC refs
+#ifdef TARGET_LOONGARCH64
+    unsigned int igInsCnt; // # of instructions  in this group
+#else
     unsigned char igInsCnt; // # of instructions  in this group
+#endif
 
 #else // REGMASK_BITS
 
@@ -344,8 +348,11 @@ struct insGroup
             ptr -= sizeof(VARSET_TP);
         }
 
+#if defined(TARGET_LOONGARCH64)
+        ptr -= sizeof(VARSET_TP);
+#else
         ptr -= sizeof(unsigned);
-
+#endif
         return *(unsigned*)ptr;
     }
 
@@ -586,6 +593,10 @@ class emitter
 #define INSTR_ENCODED_SIZE 4
         static_assert_no_msg(INS_count <= 512);
         instruction _idIns : 9;
+#elif defined(TARGET_LOONGARCH64)
+        /* TODO: not include SIMD-vector. */
+        static_assert_no_msg(INS_count <= 512);
+        instruction _idIns : 9;
 #else  // !(defined(TARGET_XARCH) || defined(TARGET_ARM64))
         static_assert_no_msg(INS_count <= 256);
         instruction _idIns : 8;
@@ -595,6 +606,10 @@ class emitter
 #if defined(TARGET_XARCH)
         static_assert_no_msg(IF_COUNT <= 128);
         insFormat _idInsFmt : 7;
+#elif defined(TARGET_LOONGARCH64)
+        //insFormat _idInsFmt : 5;// NOTE: LOONGARCH64 does not used the _idInsFmt .
+        unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the _idInsCount.
+        //unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described.
 #else
         static_assert_no_msg(IF_COUNT <= 256);
         insFormat _idInsFmt : 8;
@@ -611,6 +626,15 @@ class emitter
             _idIns = ins;
         }
 
+#if defined(TARGET_LOONGARCH64)
+        insFormat idInsFmt() const
+        {//not used for LOONGARCH64.
+            return (insFormat)0;
+        }
+        void idInsFmt(insFormat insFmt)
+        {
+        }
+#else
         insFormat idInsFmt() const
         {
             return _idInsFmt;
@@ -623,6 +647,7 @@ class emitter
             assert(insFmt < IF_COUNT);
             _idInsFmt = insFmt;
         }
+#endif
 
         void idSetRelocFlags(emitAttr attr)
         {
@@ -636,6 +661,7 @@ class emitter
         // amd64: 17 bits
         // arm:   16 bits
         // arm64: 17 bits
+        //loongarch64: 14 bits
 
     private:
 #if defined(TARGET_XARCH)
@@ -645,6 +671,8 @@ class emitter
                                   // doesn't cross a byte boundary.
 #elif defined(TARGET_ARM64)
 // Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields
+#elif defined(TARGET_LOONGARCH64)
+        /* _idOpSize defined bellow. */
 #else  // ARM
         opSize      _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
 #endif // ARM
@@ -695,6 +723,13 @@ class emitter
         unsigned _idLclVar : 1; // access a local on stack
 #endif
 
+#ifdef TARGET_LOONGARCH64
+        /* TODO: for LOONGARCH: maybe delete on future. */
+        opSize   _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16
+        insOpts  _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the accessing a local on stack.
+        unsigned _idLclVar : 1; // access a local on stack.
+#endif
+
 #ifdef TARGET_ARM
         insSize  _idInsSize : 2;   // size of instruction: 16, 32 or 48 bits
         insFlags _idInsFlags : 1;  // will this instruction set the flags
@@ -711,6 +746,9 @@ class emitter
 #elif defined(TARGET_XARCH)
                                    // For xarch, we have used 14 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (14)
+#elif defined(TARGET_LOONGARCH64)
+// For Loongarch64, we have used 14 bits from the second DWORD.
+#define ID_EXTRA_BITFIELD_BITS (14)
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -721,6 +759,7 @@ class emitter
         // amd64: 46 bits
         // arm:   48 bits
         // arm64: 49 bits
+        //loongarch64: 46 bits
 
         unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag
         unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag
@@ -733,6 +772,7 @@ class emitter
         // amd64: 48 bits
         // arm:   50 bits
         // arm64: 51 bits
+        // loongarch64: 48 bits
         CLANG_FORMAT_COMMENT_ANCHOR;
 
 #define ID_EXTRA_BITS (ID_EXTRA_RELOC_BITS + ID_EXTRA_BITFIELD_BITS)
@@ -810,7 +850,7 @@ class emitter
 // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts
 // about reading what we think is here, to avoid unexpected corruption issues.
 
-#ifndef TARGET_ARM64
+#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
             emitLclVarAddr iiaLclVar;
 #endif
             BasicBlock*  iiaBBlabel;
@@ -862,7 +902,38 @@ class emitter
                 regNumber _idReg3 : REGNUM_BITS;
                 regNumber _idReg4 : REGNUM_BITS;
             };
-#endif // defined(TARGET_XARCH)
+#elif defined(TARGET_LOONGARCH64) // TARGET_XARCH
+            struct
+            {
+                unsigned int iiaEncodedInstr;//instruction's binary encoding.
+                regNumber _idReg3 : REGNUM_BITS;
+                regNumber _idReg4 : REGNUM_BITS;
+            };
+
+            struct
+            {
+                int iiaJmpOffset;//temporary saving the offset of jmp or data.
+                emitLclVarAddr iiaLclVar;
+            };
+
+            void iiaSetInstrEncode(unsigned int encode)
+            {
+                iiaEncodedInstr = encode;
+            }
+            unsigned int iiaGetInstrEncode() const
+            {
+                return iiaEncodedInstr;
+            }
+
+            void iiaSetJmpOffset(int offset)
+            {
+                iiaJmpOffset = offset;
+            }
+            int iiaGetJmpOffset() const
+            {
+                return iiaJmpOffset;
+            }
+#endif // defined(TARGET_LOONGARCH64)
 
         } _idAddrUnion;
 
@@ -962,10 +1033,21 @@ class emitter
             _idInsFlags = sf;
             assert(sf == _idInsFlags);
         }
-#endif // TARGET_ARM
 
-        emitAttr idOpSize()
+#elif defined(TARGET_LOONGARCH64)
+        unsigned idCodeSize() const
+        {
+            return _idCodeSize;//_idInsCount;
+        }
+        void idCodeSize(unsigned sz)
         {
+            assert(sz < 32);
+            _idCodeSize = sz;
+        }
+#endif // TARGET_LOONGARCH64
+
+        emitAttr idOpSize()
+        {//NOTE: not used for LOONGARCH64.
             return emitDecodeSize(_idOpSize);
         }
         void idOpSize(emitAttr opsz)
@@ -1087,6 +1169,42 @@ class emitter
 
 #endif // TARGET_ARMARCH
 
+#ifdef TARGET_LOONGARCH64
+        insOpts idInsOpt() const
+        {
+            return (insOpts)_idInsOpt;
+        }
+        void idInsOpt(insOpts opt)
+        {
+            _idInsOpt = opt;
+            assert(opt == _idInsOpt);
+        }
+
+        regNumber idReg3() const
+        {
+            assert(!idIsSmallDsc());
+            return idAddr()->_idReg3;
+        }
+        void idReg3(regNumber reg)
+        {
+            assert(!idIsSmallDsc());
+            idAddr()->_idReg3 = reg;
+            assert(reg == idAddr()->_idReg3);
+        }
+        regNumber idReg4() const
+        {
+            assert(!idIsSmallDsc());
+            return idAddr()->_idReg4;
+        }
+        void idReg4(regNumber reg)
+        {
+            assert(!idIsSmallDsc());
+            idAddr()->_idReg4 = reg;
+            assert(reg == idAddr()->_idReg4);
+        }
+
+#endif // TARGET_LOONGARCH64
+
         inline static bool fitsInSmallCns(ssize_t val)
         {
             return ((val >= ID_MIN_SMALL_CNS) && (val <= ID_MAX_SMALL_CNS));
@@ -1175,6 +1293,17 @@ class emitter
         }
 #endif // defined(TARGET_ARM)
 
+#ifdef TARGET_LOONGARCH64
+        bool idIsLclVar() const
+        {
+            return _idLclVar != 0;
+        }
+        void idSetIsLclVar()
+        {
+            _idLclVar = 1;
+        }
+#endif // TARGET_LOONGARCH64
+
         bool idIsCnsReloc() const
         {
             return _idCnsReloc != 0;
@@ -1325,6 +1454,23 @@ class emitter
 #define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C
 #define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C
 
+#elif defined(TARGET_LOONGARCH64)
+// a read,write or modify from stack location, possible def to use latency from L0 cache
+#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_3C
+
+// a read, write or modify from constant location, possible def to use latency from L0 cache
+#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_3C
+#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_3C
+
+// a read, write or modify from memory location, possible def to use latency from L0 or L1 cache
+// plus an extra cost  (of 1.0) for a increased chance  of a cache miss
+#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_4C
+#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C
+#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C
+
 #endif // TARGET_XXX
 
 // Make this an enum:
@@ -1735,6 +1881,10 @@ class emitter
 #endif // defined(TARGET_X86)
 #endif // !defined(HOST_64BIT)
 
+#ifdef TARGET_LOONGARCH64
+    unsigned int emitCounts_INS_OPTS_J;//INS_OPTS_J
+#endif // defined(TARGET_LOONGARCH64)
+
     size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp);
     size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp);
 
@@ -1783,9 +1933,18 @@ class emitter
 // ugly code like "movw r10, 0x488; add r10, sp; vstr s0, [r10]" for each store, which
 // eats up our insGroup buffer.
 #define SC_IG_BUFFER_SIZE (100 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE)
-#else // !TARGET_ARMARCH
+
+#elif defined(TARGET_LOONGARCH64)
+
+#ifdef DEBUG
+#define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE)
+#else
+#define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 20 * SMALL_IDSC_SIZE)
+#endif
+
+#else // !TARGET_LOONGARCH64
 #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE)
-#endif // !TARGET_ARMARCH
+#endif // !TARGET_LOONGARCH64
 
     size_t emitIGbuffSize;
 
@@ -1981,7 +2140,19 @@ class emitter
 
     static void emitGenerateUnwindNop(instrDesc* id, void* context);
 
-#endif // TARGET_ARMARCH
+#elif defined(TARGET_LOONGARCH64)
+    void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt);
+    bool emitGetLocationInfo(emitLocation* emitLoc, insGroup** pig, instrDesc** pid, int* pinsRemaining = NULL);
+
+    bool emitNextID(insGroup*& ig, instrDesc*& id, int& insRemaining);
+
+    typedef void (*emitProcessInstrFunc_t)(instrDesc* id, void* context);
+
+    void emitWalkIDs(emitLocation* locFrom, emitProcessInstrFunc_t processFunc, void* context);
+
+    static void emitGenerateUnwindNop(instrDesc* id, void* context);
+
+#endif // TARGET_LOONGARCH64
 
 #ifdef TARGET_X86
     void emitMarkStackLvl(unsigned stackLevel);
@@ -2172,7 +2343,10 @@ class emitter
 
     // Returns "true" if instruction "id->idIns()" writes to a LclVar stack slot pair.
     bool emitInsWritesToLclVarStackLocPair(instrDesc* id);
-#endif // TARGET_ARMARCH
+#elif defined(TARGET_LOONGARCH64)
+    bool emitInsMayWriteToGCReg(instruction ins);
+    bool emitInsWritesToLclVarStackLoc(instrDesc* id);
+#endif // TARGET_LOONGARCH64
 
     /************************************************************************/
     /*    The following is used to distinguish helper vs non-helper calls   */
diff --git a/src/coreclr/jit/emitdef.h b/src/coreclr/jit/emitdef.h
index c9f003ccce1b6..35b46314a1225 100644
--- a/src/coreclr/jit/emitdef.h
+++ b/src/coreclr/jit/emitdef.h
@@ -12,6 +12,8 @@
 #include "emitarm.h"
 #elif defined(TARGET_ARM64)
 #include "emitarm64.h"
+#elif defined(TARGET_LOONGARCH64)
+#include "emitloongarch64.h"
 #else
 #error Unsupported or unset target architecture
 #endif
diff --git a/src/coreclr/jit/emitfmts.h b/src/coreclr/jit/emitfmts.h
index c252c0b1237d3..77712ed95cce3 100644
--- a/src/coreclr/jit/emitfmts.h
+++ b/src/coreclr/jit/emitfmts.h
@@ -8,6 +8,8 @@
 #include "emitfmtsarm.h"
 #elif defined(TARGET_ARM64)
 #include "emitfmtsarm64.h"
+#elif defined(TARGET_LOONGARCH64)
+#include "emitfmtsloongarch64.h"
 #else
 #error Unsupported or unset target architecture
 #endif // target type
diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h
new file mode 100644
index 0000000000000..b4232269b144f
--- /dev/null
+++ b/src/coreclr/jit/emitfmtsloongarch64.h
@@ -0,0 +1,82 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Copyright (c) Loongson Technology. All rights reserved.
+
+//////////////////////////////////////////////////////////////////////////////
+
+// clang-format off
+#if !defined(TARGET_LOONGARCH64)
+#error Unexpected target type
+#endif
+
+#ifdef DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#undef DEFINE_ID_OPS
+enum ID_OPS
+{
+    ID_OP_NONE, // no additional arguments
+    ID_OP_SCNS, // small const  operand (21-bits or less, no reloc)
+    ID_OP_JMP,  // local jump
+    ID_OP_CALL, // method call
+    ID_OP_SPEC, // special handling required
+};
+
+//////////////////////////////////////////////////////////////////////////////
+#else // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef IF_DEF
+#error Must define IF_DEF macro before including this file
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// enum insFormat   instruction            enum ID_OPS
+//                  scheduling
+//                  (unused)
+//////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(NONE, IS_NONE, NONE) //
+
+
+//IF_DEF(LABEL, IS_NONE, JMP)    // label
+//IF_DEF(LARGEJMP, IS_NONE, JMP) // large conditional branch pseudo-op (cond branch + uncond branch)
+//IF_DEF(LARGEADR, IS_NONE, JMP) // large address pseudo-op (adrp + add)
+//IF_DEF(LARGELDC, IS_NONE, JMP) // large constant pseudo-op (adrp + ldr)
+
+
+IF_DEF(OPCODE, IS_NONE, NONE)
+IF_DEF(OPCODES_16, IS_NONE, NONE)
+IF_DEF(OP_FMT, IS_NONE, NONE)
+IF_DEF(OP_FMT_16, IS_NONE, NONE)
+IF_DEF(OP_FMTS_16, IS_NONE, NONE)
+IF_DEF(FMT_FUNC, IS_NONE, NONE)
+IF_DEF(FMT_FUNC_6, IS_NONE, NONE)
+IF_DEF(FMT_FUNC_16, IS_NONE, NONE)
+IF_DEF(FMT_FUNCS_6, IS_NONE, NONE)
+IF_DEF(FMT_FUNCS_16, IS_NONE, NONE)
+IF_DEF(FMT_FUNCS_6A, IS_NONE, NONE)
+IF_DEF(FMT_FUNCS_11A, IS_NONE, NONE)
+IF_DEF(FUNC, IS_NONE, NONE)
+IF_DEF(FUNC_6, IS_NONE, NONE)
+IF_DEF(FUNC_16, IS_NONE, NONE)
+IF_DEF(FUNC_21, IS_NONE, NONE)
+IF_DEF(FUNCS_6, IS_NONE, NONE)
+IF_DEF(FUNCS_6A, IS_NONE, NONE)
+IF_DEF(FUNCS_6B, IS_NONE, NONE)
+IF_DEF(FUNCS_6C, IS_NONE, NONE)
+IF_DEF(FUNCS_6D, IS_NONE, NONE)
+IF_DEF(FUNCS_6E, IS_NONE, NONE)
+IF_DEF(FUNCS_11, IS_NONE, NONE)
+
+
+//////////////////////////////////////////////////////////////////////////////
+#undef IF_DEF
+//////////////////////////////////////////////////////////////////////////////
+
+#endif // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h
index 484eca3399b4e..82c78299efebd 100644
--- a/src/coreclr/jit/emitinl.h
+++ b/src/coreclr/jit/emitinl.h
@@ -335,6 +335,36 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id)
 
     id->idReg2((regNumber)encodeMask); // Save in idReg2
 
+#elif defined(TARGET_LOONGARCH64)
+    assert(REGNUM_BITS >= 5);
+    encodeMask = 0;
+
+    if ((regmask & RBM_S0) != RBM_NONE)
+        encodeMask |= 0x01;
+    if ((regmask & RBM_S1) != RBM_NONE)
+        encodeMask |= 0x02;
+    if ((regmask & RBM_S2) != RBM_NONE)
+        encodeMask |= 0x04;
+    if ((regmask & RBM_S3) != RBM_NONE)
+        encodeMask |= 0x08;
+    if ((regmask & RBM_S4) != RBM_NONE)
+        encodeMask |= 0x10;
+
+    id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+    encodeMask = 0;
+
+    if ((regmask & RBM_S5) != RBM_NONE)
+        encodeMask |= 0x01;
+    if ((regmask & RBM_S6) != RBM_NONE)
+        encodeMask |= 0x02;
+    if ((regmask & RBM_S7) != RBM_NONE)
+        encodeMask |= 0x04;
+    if ((regmask & RBM_S8) != RBM_NONE)
+        encodeMask |= 0x08;
+
+    id->idReg2((regNumber)encodeMask); // Save in idReg2
+
 #else
     NYI("unknown target");
 #endif
@@ -447,6 +477,32 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id)
     if ((encodeMask & 0x10) != 0)
         regmask |= RBM_R28;
 
+#elif defined(TARGET_LOONGARCH64)
+    assert(REGNUM_BITS >= 5);
+    encodeMask = id->idReg1();
+
+    if ((encodeMask & 0x01) != 0)
+        regmask |= RBM_S0;
+    if ((encodeMask & 0x02) != 0)
+        regmask |= RBM_S1;
+    if ((encodeMask & 0x04) != 0)
+        regmask |= RBM_S2;
+    if ((encodeMask & 0x08) != 0)
+        regmask |= RBM_S3;
+    if ((encodeMask & 0x10) != 0)
+        regmask |= RBM_S4;
+
+    encodeMask = id->idReg2();
+
+    if ((encodeMask & 0x01) != 0)
+        regmask |= RBM_S5;
+    if ((encodeMask & 0x02) != 0)
+        regmask |= RBM_S6;
+    if ((encodeMask & 0x04) != 0)
+        regmask |= RBM_S7;
+    if ((encodeMask & 0x08) != 0)
+        regmask |= RBM_S8;
+
 #else
     NYI("unknown target");
 #endif
diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h
index 4ed340302119d..0a19c7cbf138e 100644
--- a/src/coreclr/jit/emitjmps.h
+++ b/src/coreclr/jit/emitjmps.h
@@ -46,6 +46,26 @@ JMP_SMALL(lt    , ge    , blt    )  // LT
 JMP_SMALL(gt    , le    , bgt    )  // GT
 JMP_SMALL(le    , gt    , ble    )  // LE
 
+#elif defined(TARGET_LOONGARCH64)
+
+/* TODO for LOONGARCH: should redesign!!! */
+//       jump   reverse instruction condcode
+JMP_SMALL(jmp   , jmp   , b      )  // AL always
+JMP_SMALL(eq    , ne    , beq    )  // EQ
+JMP_SMALL(ne    , eq    , bne    )  // NE
+//JMP_SMALL(hs    , lo    , bgez    )  // HS also CS
+//JMP_SMALL(lo    , hs    , bltz    )  // LO also CC
+//JMP_SMALL(mi    , pl    , bmi    )  // MI
+//JMP_SMALL(pl    , mi    , bpl    )  // PL
+//JMP_SMALL(vs    , vc    , bvs    )  // VS
+//JMP_SMALL(vc    , vs    , bvc    )  // VC
+//JMP_SMALL(hi    , ls    , bhi    )  // HI
+//JMP_SMALL(ls    , hi    , bls    )  // LS
+//JMP_SMALL(gez    , ltz    , bgez    )  // GE
+//JMP_SMALL(gtz    , lez    , bgtz    )  // GT
+//JMP_SMALL(ltz    , gez    , bltz    )  // LT
+//JMP_SMALL(lez    , gtz    , blez    )  // LE
+
 #else
   #error Unsupported or unset target architecture
 #endif // target type
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
new file mode 100644
index 0000000000000..a5492bee3394b
--- /dev/null
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -0,0 +1,6780 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.//emitarm64.cpp deletes this line.
+
+// Copyright (c) Loongson Technology. All rights reserved.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                             emitloongarch64.cpp                                XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(TARGET_LOONGARCH64)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+////These are used for loongarch64 instrs's dump.
+////LA_OP_2R  opcode: bit31 ~ bit10
+#define LA_2R_CLO_W         0x4
+#define LA_2R_CLZ_W         0x5
+#define LA_2R_CTO_W         0x6
+#define LA_2R_CTZ_W         0x7
+#define LA_2R_CLO_D         0x8
+#define LA_2R_CLZ_D         0x9
+#define LA_2R_CTO_D         0xa
+#define LA_2R_CTZ_D         0xb
+#define LA_2R_REVB_2H       0xc
+#define LA_2R_REVB_4H       0xd
+#define LA_2R_REVB_2W       0xe
+#define LA_2R_REVB_D        0xf
+#define LA_2R_REVH_2W       0x10
+#define LA_2R_REVH_D        0x11
+#define LA_2R_BITREV_4B     0x12
+#define LA_2R_BITREV_8B     0x13
+#define LA_2R_BITREV_W      0x14
+#define LA_2R_BITREV_D      0x15
+#define LA_2R_EXT_W_H       0x16
+#define LA_2R_EXT_W_B       0x17
+#define LA_2R_RDTIMEL_W     0x18
+#define LA_2R_RDTIMEH_W     0x19
+#define LA_2R_RDTIME_D      0x1a
+#define LA_2R_CPUCFG        0x1b
+#define LA_2R_ASRTLE_D      0x2
+#define LA_2R_ASRTGT_D      0x3
+#define LA_2R_FABS_S        0x4501
+#define LA_2R_FABS_D        0x4502
+#define LA_2R_FNEG_S        0x4505
+#define LA_2R_FNEG_D        0x4506
+#define LA_2R_FLOGB_S       0x4509
+#define LA_2R_FLOGB_D       0x450a
+#define LA_2R_FCLASS_S      0x450d
+#define LA_2R_FCLASS_D      0x450e
+#define LA_2R_FSQRT_S       0x4511
+#define LA_2R_FSQRT_D       0x4512
+#define LA_2R_FRECIP_S      0x4515
+#define LA_2R_FRECIP_D      0x4516
+#define LA_2R_FRSQRT_S      0x4519
+#define LA_2R_FRSQRT_D      0x451a
+#define LA_2R_FMOV_S        0x4525
+#define LA_2R_FMOV_D        0x4526
+#define LA_2R_MOVGR2FR_W    0x4529
+#define LA_2R_MOVGR2FR_D    0x452a
+#define LA_2R_MOVGR2FRH_W   0x452b
+#define LA_2R_MOVFR2GR_S    0x452d
+#define LA_2R_MOVFR2GR_D    0x452e
+#define LA_2R_MOVFRH2GR_S   0x452f
+#define LA_2R_MOVGR2FCSR    0x4530
+#define LA_2R_MOVFCSR2GR    0x4532
+#define LA_2R_MOVFR2CF      0x4534
+#define LA_2R_MOVCF2FR      0x4535
+#define LA_2R_MOVGR2CF      0x4536
+#define LA_2R_MOVCF2GR      0x4537
+#define LA_2R_FCVT_S_D      0x4646
+#define LA_2R_FCVT_D_S      0x4649
+#define LA_2R_FTINTRM_W_S   0x4681
+#define LA_2R_FTINTRM_W_D   0x4682
+#define LA_2R_FTINTRM_L_S   0x4689
+#define LA_2R_FTINTRM_L_D   0x468a
+#define LA_2R_FTINTRP_W_S   0x4691
+#define LA_2R_FTINTRP_W_D   0x4692
+#define LA_2R_FTINTRP_L_S   0x4699
+#define LA_2R_FTINTRP_L_D   0x469a
+#define LA_2R_FTINTRZ_W_S   0x46a1
+#define LA_2R_FTINTRZ_W_D   0x46a2
+#define LA_2R_FTINTRZ_L_S   0x46a9
+#define LA_2R_FTINTRZ_L_D   0x46aa
+#define LA_2R_FTINTRNE_W_S  0x46b1
+#define LA_2R_FTINTRNE_W_D  0x46b2
+#define LA_2R_FTINTRNE_L_S  0x46b9
+#define LA_2R_FTINTRNE_L_D  0x46ba
+#define LA_2R_FTINT_W_S     0x46c1
+#define LA_2R_FTINT_W_D     0x46c2
+#define LA_2R_FTINT_L_S     0x46c9
+#define LA_2R_FTINT_L_D     0x46ca
+#define LA_2R_FFINT_S_W     0x4744
+#define LA_2R_FFINT_S_L     0x4746
+#define LA_2R_FFINT_D_W     0x4748
+#define LA_2R_FFINT_D_L     0x474a
+#define LA_2R_FRINT_S       0x4791
+#define LA_2R_FRINT_D       0x4792
+#define LA_2R_IOCSRRD_B     0x19200
+#define LA_2R_IOCSRRD_H     0x19201
+#define LA_2R_IOCSRRD_W     0x19202
+#define LA_2R_IOCSRRD_D     0x19203
+#define LA_2R_IOCSRWR_B     0x19204
+#define LA_2R_IOCSRWR_H     0x19205
+#define LA_2R_IOCSRWR_W     0x19206
+#define LA_2R_IOCSRWR_D     0x19207
+
+////LA_OP_3R  opcode: bit31 ~ bit15
+#define LA_3R_ADD_W        0x20
+#define LA_3R_ADD_D        0x21
+#define LA_3R_SUB_W        0x22
+#define LA_3R_SUB_D        0x23
+#define LA_3R_SLT          0x24
+#define LA_3R_SLTU         0x25
+#define LA_3R_MASKEQZ      0x26
+#define LA_3R_MASKNEZ      0x27
+#define LA_3R_NOR          0x28
+#define LA_3R_AND          0x29
+#define LA_3R_OR           0x2a
+#define LA_3R_XOR          0x2b
+#define LA_3R_ORN          0x2c
+#define LA_3R_ANDN         0x2d
+#define LA_3R_SLL_W        0x2e
+#define LA_3R_SRL_W        0x2f
+#define LA_3R_SRA_W        0x30
+#define LA_3R_SLL_D        0x31
+#define LA_3R_SRL_D        0x32
+#define LA_3R_SRA_D        0x33
+#define LA_3R_ROTR_W       0x36
+#define LA_3R_ROTR_D       0x37
+#define LA_3R_MUL_W        0x38
+#define LA_3R_MULH_W       0x39
+#define LA_3R_MULH_WU      0x3a
+#define LA_3R_MUL_D        0x3b
+#define LA_3R_MULH_D       0x3c
+#define LA_3R_MULH_DU      0x3d
+#define LA_3R_MULW_D_W     0x3e
+#define LA_3R_MULW_D_WU    0x3f
+#define LA_3R_DIV_W        0x40
+#define LA_3R_MOD_W        0x41
+#define LA_3R_DIV_WU       0x42
+#define LA_3R_MOD_WU       0x43
+#define LA_3R_DIV_D        0x44
+#define LA_3R_MOD_D        0x45
+#define LA_3R_DIV_DU       0x46
+#define LA_3R_MOD_DU       0x47
+#define LA_3R_CRC_W_B_W    0x48
+#define LA_3R_CRC_W_H_W    0x49
+#define LA_3R_CRC_W_W_W    0x4a
+#define LA_3R_CRC_W_D_W    0x4b
+#define LA_3R_CRCC_W_B_W   0x4c
+#define LA_3R_CRCC_W_H_W   0x4d
+#define LA_3R_CRCC_W_W_W   0x4e
+#define LA_3R_CRCC_W_D_W   0x4f
+#define LA_3R_FADD_S       0x201
+#define LA_3R_FADD_D       0x202
+#define LA_3R_FSUB_S       0x205
+#define LA_3R_FSUB_D       0x206
+#define LA_3R_FMUL_S       0x209
+#define LA_3R_FMUL_D       0x20a
+#define LA_3R_FDIV_S       0x20d
+#define LA_3R_FDIV_D       0x20e
+#define LA_3R_FMAX_S       0x211
+#define LA_3R_FMAX_D       0x212
+#define LA_3R_FMIN_S       0x215
+#define LA_3R_FMIN_D       0x216
+#define LA_3R_FMAXA_S      0x219
+#define LA_3R_FMAXA_D      0x21a
+#define LA_3R_FMINA_S      0x21d
+#define LA_3R_FMINA_D      0x21e
+#define LA_3R_FSCALEB_S    0x221
+#define LA_3R_FSCALEB_D    0x222
+#define LA_3R_FCOPYSIGN_S  0x225
+#define LA_3R_FCOPYSIGN_D  0x226
+#define LA_3R_INVTLB       0xc91
+#define LA_3R_LDX_B        0x7000
+#define LA_3R_LDX_H        0x7008
+#define LA_3R_LDX_W        0x7010
+#define LA_3R_LDX_D        0x7018
+#define LA_3R_STX_B        0x7020
+#define LA_3R_STX_H        0x7028
+#define LA_3R_STX_W        0x7030
+#define LA_3R_STX_D        0x7038
+#define LA_3R_LDX_BU       0x7040
+#define LA_3R_LDX_HU       0x7048
+#define LA_3R_LDX_WU       0x7050
+#define LA_3R_PRELDX       0x7058
+#define LA_3R_FLDX_S       0x7060
+#define LA_3R_FLDX_D       0x7068
+#define LA_3R_FSTX_S       0x7070
+#define LA_3R_FSTX_D       0x7078
+#define LA_3R_AMSWAP_W     0x70c0
+#define LA_3R_AMSWAP_D     0x70c1
+#define LA_3R_AMADD_W      0x70c2
+#define LA_3R_AMADD_D      0x70c3
+#define LA_3R_AMAND_W      0x70c4
+#define LA_3R_AMAND_D      0x70c5
+#define LA_3R_AMOR_W       0x70c6
+#define LA_3R_AMOR_D       0x70c7
+#define LA_3R_AMXOR_W      0x70c8
+#define LA_3R_AMXOR_D      0x70c9
+#define LA_3R_AMMAX_W      0x70ca
+#define LA_3R_AMMAX_D      0x70cb
+#define LA_3R_AMMIN_W      0x70cc
+#define LA_3R_AMMIN_D      0x70cd
+#define LA_3R_AMMAX_WU     0x70ce
+#define LA_3R_AMMAX_DU     0x70cf
+#define LA_3R_AMMIN_WU     0x70d0
+#define LA_3R_AMMIN_DU     0x70d1
+#define LA_3R_AMSWAP_DB_W  0x70d2
+#define LA_3R_AMSWAP_DB_D  0x70d3
+#define LA_3R_AMADD_DB_W   0x70d4
+#define LA_3R_AMADD_DB_D   0x70d5
+#define LA_3R_AMAND_DB_W   0x70d6
+#define LA_3R_AMAND_DB_D   0x70d7
+#define LA_3R_AMOR_DB_W    0x70d8
+#define LA_3R_AMOR_DB_D    0x70d9
+#define LA_3R_AMXOR_DB_W   0x70da
+#define LA_3R_AMXOR_DB_D   0x70db
+#define LA_3R_AMMAX_DB_W   0x70dc
+#define LA_3R_AMMAX_DB_D   0x70dd
+#define LA_3R_AMMIN_DB_W   0x70de
+#define LA_3R_AMMIN_DB_D   0x70df
+#define LA_3R_AMMAX_DB_WU  0x70e0
+#define LA_3R_AMMAX_DB_DU  0x70e1
+#define LA_3R_AMMIN_DB_WU  0x70e2
+#define LA_3R_AMMIN_DB_DU  0x70e3
+#define LA_3R_FLDGT_S      0x70e8
+#define LA_3R_FLDGT_D      0x70e9
+#define LA_3R_FLDLE_S      0x70ea
+#define LA_3R_FLDLE_D      0x70eb
+#define LA_3R_FSTGT_S      0x70ec
+#define LA_3R_FSTGT_D      0x70ed
+#define LA_3R_FSTLE_S      0x70ee
+#define LA_3R_FSTLE_D      0x70ef
+#define LA_3R_LDGT_B       0x70f0
+#define LA_3R_LDGT_H       0x70f1
+#define LA_3R_LDGT_W       0x70f2
+#define LA_3R_LDGT_D       0x70f3
+#define LA_3R_LDLE_B       0x70f4
+#define LA_3R_LDLE_H       0x70f5
+#define LA_3R_LDLE_W       0x70f6
+#define LA_3R_LDLE_D       0x70f7
+#define LA_3R_STGT_B       0x70f8
+#define LA_3R_STGT_H       0x70f9
+#define LA_3R_STGT_W       0x70fa
+#define LA_3R_STGT_D       0x70fb
+#define LA_3R_STLE_B       0x70fc
+#define LA_3R_STLE_H       0x70fd
+#define LA_3R_STLE_W       0x70fe
+#define LA_3R_STLE_D       0x70ff
+
+////LA_OP_4R opcode: bit31 ~ bit20
+#define LA_4R_FMADD_S    0x81
+#define LA_4R_FMADD_D    0x82
+#define LA_4R_FMSUB_S    0x85
+#define LA_4R_FMSUB_D    0x86
+#define LA_4R_FNMADD_S   0x89
+#define LA_4R_FNMADD_D   0x8a
+#define LA_4R_FNMSUB_S   0x8d
+#define LA_4R_FNMSUB_D   0x8e
+#define LA_4R_FSEL       0xd0
+
+////LA_OP_2RI8
+
+////LA_OP_2RI12 opcode: bit31 ~ bit22
+#define LA_2RI12_SLTI     0x8
+#define LA_2RI12_SLTUI    0x9
+#define LA_2RI12_ADDI_W   0xa
+#define LA_2RI12_ADDI_D   0xb
+#define LA_2RI12_LU52I_D  0xc
+#define LA_2RI12_ANDI     0xd
+#define LA_2RI12_ORI      0xe
+#define LA_2RI12_XORI     0xf
+#define LA_2RI12_CACHE    0x18
+#define LA_2RI12_LD_B     0xa0
+#define LA_2RI12_LD_H     0xa1
+#define LA_2RI12_LD_W     0xa2
+#define LA_2RI12_LD_D     0xa3
+#define LA_2RI12_ST_B     0xa4
+#define LA_2RI12_ST_H     0xa5
+#define LA_2RI12_ST_W     0xa6
+#define LA_2RI12_ST_D     0xa7
+#define LA_2RI12_LD_BU    0xa8
+#define LA_2RI12_LD_HU    0xa9
+#define LA_2RI12_LD_WU    0xaa
+#define LA_2RI12_PRELD    0xab
+#define LA_2RI12_FLD_S    0xac
+#define LA_2RI12_FST_S    0xad
+#define LA_2RI12_FLD_D    0xae
+#define LA_2RI12_FST_D    0xaf
+
+////LA_OP_2RI14i opcode: bit31 ~ bit24
+#define LA_2RI14_LL_W      0x20
+#define LA_2RI14_SC_W      0x21
+#define LA_2RI14_LL_D      0x22
+#define LA_2RI14_SC_D      0x23
+#define LA_2RI14_LDPTR_W   0x24
+#define LA_2RI14_STPTR_W   0x25
+#define LA_2RI14_LDPTR_D   0x26
+#define LA_2RI14_STPTR_D   0x27
+
+////LA_OP_2RI16 opcode: bit31 ~ bit26
+#define LA_2RI16_ADDU16I_D  0x4
+#define LA_2RI16_JIRL       0x13
+#define LA_2RI16_BEQ        0x16
+#define LA_2RI16_BNE        0x17
+#define LA_2RI16_BLT        0x18
+#define LA_2RI16_BGE        0x19
+#define LA_2RI16_BLTU       0x1a
+#define LA_2RI16_BGEU       0x1b
+
+////LA_OP_1RI20 opcode: bit31 ~ bit25
+#define LA_1RI20_LU12I_W    0xa
+#define LA_1RI20_LU32I_D    0xb
+#define LA_1RI20_PCADDI     0xc
+#define LA_1RI20_PCALAU12I  0xd
+#define LA_1RI20_PCADDU12I  0xe
+#define LA_1RI20_PCADDU18I  0xf
+
+////LA_OP_I26
+#define LA_I26_B   0x14
+#define LA_I26_BL  0x15
+
+////LA_OP_1RI21
+#define LA_1RI21_BEQZ   0x10
+#define LA_1RI21_BNEZ   0x11
+#define LA_1RI21_BCEQZ  0x12
+#define LA_1RI21_BCNEZ  0x12
+
+////other
+#define LA_OP_ALSL_W       0x1
+#define LA_OP_ALSL_WU      0x1
+#define LA_OP_ALSL_D       0xb
+#define LA_OP_BYTEPICK_W   0x2
+#define LA_OP_BYTEPICK_D   0x3
+#define LA_OP_BREAK        0x54
+#define LA_OP_DBGCALL      0x55
+#define LA_OP_SYSCALL      0x56
+#define LA_OP_SLLI_W       0x10
+#define LA_OP_SLLI_D       0x10
+#define LA_OP_SRLI_W       0x11
+#define LA_OP_SRLI_D       0x11
+#define LA_OP_SRAI_W       0x12
+#define LA_OP_SRAI_D       0x12
+#define LA_OP_ROTRI_W      0x13
+#define LA_OP_ROTRI_D      0x13
+#define LA_OP_FCMP_cond_S  0xc1
+#define LA_OP_FCMP_cond_D  0xc2
+#define LA_OP_BSTRINS_W    0x1
+#define LA_OP_BSTRPICK_W   0x1
+#define LA_OP_BSTRINS_D    0x2
+#define LA_OP_BSTRPICK_D   0x3
+#define LA_OP_DBAR         0x70e4
+#define LA_OP_IBAR         0x70e5
+
+//// add other define-macro here.
+
+
+/*****************************************************************************/
+
+const instruction emitJumpKindInstructions[] = {
+    INS_nop,
+
+#define JMP_SMALL(en, rev, ins) INS_##ins,
+#include "emitjmps.h"
+};
+
+const emitJumpKind emitReverseJumpKinds[] = {
+    EJ_NONE,
+
+#define JMP_SMALL(en, rev, ins) EJ_##rev,
+#include "emitjmps.h"
+};
+
+/*****************************************************************************
+ * The macro define for instructions.
+ */
+
+#define D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)  \
+            op0_code |= ((code_t)(op1_reg)); /* rd or fd or hint */ \
+            op0_code |= ((code_t)(op2_reg))<<5; /* rj */  \
+            op0_code |= ((op3_imm) & 0xfff)<<10
+
+#define D_INST_add_d(op0_code, op1_reg, op2_reg, op3_reg)  \
+            op0_code |= ((code_t)(op1_reg));/* rd */ \
+            op0_code |= ((code_t)(op2_reg))<<5;/* rj */ \
+            op0_code |= ((code_t)(op3_reg))<<10 /* rk */
+
+#define D_INST_3R(op0_code, op1_reg, op2_reg, op3_reg)  \
+            op0_code |= ((code_t)(op1_reg));/* rd */ \
+            op0_code |= ((code_t)(op2_reg))<<5;/* rj */ \
+            op0_code |= ((code_t)(op3_reg))<<10 /* rk */
+
+#define D_INST_JIRL(op0_code, op1_reg, op2_reg, op3_imm)  \
+    op0_code |= ((code_t)(op1_reg)); /* rd */ \
+    op0_code |= ((code_t)(op2_reg))<<5; /* rj */ \
+    op0_code |= ((op3_imm) & 0xffff)<<10  /* offs */ \
+
+#define D_INST_lu12i_w(op0_code, op1_reg, op2_imm)  \
+            op0_code |= ((code_t)(op1_reg)); /* rd */ \
+            op0_code |= ((op2_imm) & 0xfffff)<<5 /* si20 */
+
+#define D_INST_lu32i_d(op0_code, op1_reg, op2_imm)  \
+        D_INST_lu12i_w(op0_code, op1_reg, op2_imm)
+
+#define D_INST_lu52i_d(op0_code, op1_reg, op2_reg, op3_imm)  \
+        D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
+
+#define D_INST_ori(op0_code, op1_reg, op2_reg, op3_imm)  \
+        D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
+
+//Load or Store instructions.
+#define D_INST_LS(op0_code, op1_reg, op2_reg, op3_imm)  \
+        D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
+
+#define D_INST_Bcond(op0_code, op1_reg, op2_reg, op3_imm)  \
+    op0_code |= ((code_t)(op1_reg) /*& 0x1f */)<<5; /* rj */ \
+    op0_code |= ((code_t)(op2_reg) /*& 0x1f */); /* rd */ \
+    assert(!((code_t)(op3_imm) & 0x3));  \
+    op0_code |= (((code_t)(op3_imm)<<8) & 0x3fffc00) /* offset */
+
+#define D_INST_Bcond_Z(op0_code, op1_reg, op1_imm)  \
+    assert(!((code_t)(op1_imm) & 0x3));  \
+    op0_code |= ((code_t)(op1_reg) /*& 0x1f */)<<5; /* rj */ \
+    op0_code |= (((code_t)(op1_imm)<<8) & 0x3fffc00); \
+    op0_code |= (((code_t)(op1_imm)>>18) & 0x1f) /* offset */
+
+#define D_INST_B(op0_code, op1_imm)  \
+    assert(!((code_t)(op1_imm) & 0x3));  \
+    op0_code |= (((code_t)(op1_imm)>>18) & 0x3ff); \
+    op0_code |= (((code_t)(op1_imm)<<8) & 0x3fffc00) /* offset */
+
+/*****************************************************************************
+ * Look up the instruction for a jump kind
+ */
+
+/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
+{
+    assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
+    return emitJumpKindInstructions[jumpKind];
+}
+
+/*****************************************************************************
+* Look up the jump kind for an instruction. It better be a conditional
+* branch instruction with a jump kind!
+*/
+
+/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
+{
+assert(!"unimplemented on LOONGARCH yet");
+    return EJ_NONE;
+#if 0
+    for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++)
+    {
+        if (ins == emitJumpKindInstructions[i])
+        {
+            emitJumpKind ret = (emitJumpKind)i;
+            assert(EJ_NONE < ret && ret < EJ_COUNT);
+            return ret;
+        }
+    }
+    unreached();
+#endif
+}
+
+/*****************************************************************************
+ * Reverse the conditional jump
+ */
+
+/*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
+{
+    assert(jumpKind < EJ_COUNT);
+    return emitReverseJumpKinds[jumpKind];
+}
+
+/*****************************************************************************
+ *
+ *  Return the allocated size (in bytes) of the given instruction descriptor.
+ */
+
+size_t emitter::emitSizeOfInsDsc(instrDesc* id)
+{
+    if (emitIsScnsInsDsc(id))
+        return SMALL_IDSC_SIZE;
+
+    insOpts insOp = id->idInsOpt();
+
+    switch (insOp)
+    {
+        case INS_OPTS_JIRL:
+        case INS_OPTS_J_cond:
+        case INS_OPTS_J:
+            return sizeof(instrDescJmp);
+
+        case INS_OPTS_C:
+            if (id->idIsLargeCall())
+            {
+                /* Must be a "fat" call descriptor */
+                return sizeof(instrDescCGCA);
+            }
+            else
+            {
+                assert(!id->idIsLargeDsp());
+                assert(!id->idIsLargeCns());
+                return sizeof(instrDesc);
+            }
+            //break;
+
+        case INS_OPTS_I:
+        case INS_OPTS_RC:
+        case INS_OPTS_RL:
+        case INS_OPTS_RELOC:
+        case INS_OPTS_NONE:
+            return sizeof(instrDesc);
+        default:
+            NO_WAY("unexpected instruction descriptor format");
+            break;
+    }
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  The following called for each recorded instruction -- use for debugging.
+ */
+void emitter::emitInsSanityCheck(instrDesc* id)
+{
+    /* What instruction format have we got? */
+
+    switch (id->idInsFmt())
+    {
+        case IF_OPCODE:
+        case IF_OPCODES_16:
+        case IF_OP_FMT:
+        case IF_OP_FMT_16:
+        case IF_OP_FMTS_16:
+        case IF_FMT_FUNC:
+        case IF_FMT_FUNC_6:
+        case IF_FMT_FUNC_16:
+        case IF_FMT_FUNCS_6:
+        case IF_FMT_FUNCS_16:
+        case IF_FMT_FUNCS_6A:
+        case IF_FMT_FUNCS_11A:
+        case IF_FUNC:
+        case IF_FUNC_6:
+        case IF_FUNC_16:
+        case IF_FUNC_21:
+        case IF_FUNCS_6:
+        case IF_FUNCS_6A:
+        case IF_FUNCS_6B:
+        case IF_FUNCS_6C:
+        case IF_FUNCS_6D:
+        case IF_FUNCS_11:
+        //case IF_LA:
+            break;
+
+        default:
+            printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+            assert(!"Unexpected format");
+            break;
+    }
+}
+#endif // DEBUG
+
+inline bool emitter::emitInsMayWriteToGCReg(instruction ins)
+{
+    assert(ins != INS_invalid);
+    ////NOTE: please reference the file "instrsloongarch64.h" for details !!!
+    return  (INS_mov <= ins) && (ins <= INS_jirl) ? true : false;
+}
+
+bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
+{
+    if (!id->idIsLclVar())
+        return false;
+
+    instruction ins = id->idIns();
+
+    // This list is related to the list of instructions used to store local vars in emitIns_S_R().
+    // We don't accept writing to float local vars.
+
+    switch (ins)
+    {
+        case INS_st_d:
+        case INS_stptr_d:
+/////// not used these instrs right now !!!
+        //case INS_sc_d:
+        //case INS_stx_d:
+//#ifdef DEBUG
+//        case INS_st_b:
+//        case INS_st_h:
+//        case INS_st_w:
+//        case INS_stx_b:
+//        case INS_stx_h:
+//        case INS_stx_w:
+//        //case INS_sc_w:
+//        //case INS_stgt_b:
+//        //case INS_stgt_h:
+//        //case INS_stgt_w:
+//        //case INS_stgt_d:
+//        //case INS_stle_b:
+//        //case INS_stle_h:
+//        //case INS_stle_w:
+//        //case INS_stle_d:
+//#endif
+            return true;
+        default:
+            return false;
+    }
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+// clang-format off
+static const char * const  RegNames[] =
+{
+    #define REGDEF(name, rnum, mask, xname, wname) xname,
+    #include "register.h"
+};
+// clang-format on
+
+#endif // DEBUG
+
+#define LD 1
+#define ST 2
+
+// clang-format off
+/*static*/ const BYTE CodeGenInterface::instInfo[] =
+{
+    #define INSTS(id, nm, fp, info, fmt, e1) info,
+    #include "instrs.h"
+};
+// clang-format on
+
+//------------------------------------------------------------------------
+// emitInsLoad: Returns true if the instruction is some kind of load instruction.
+//
+bool emitter::emitInsIsLoad(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & LD) != 0;
+    else
+        return false;
+}
+
+//------------------------------------------------------------------------
+//emitInsIsStore: Returns true if the instruction is some kind of store instruction.
+//
+bool emitter::emitInsIsStore(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & ST) != 0;
+    else
+        return false;
+}
+
+//-------------------------------------------------------------------------
+//emitInsIsLoadOrStore: Returns true if the instruction is some kind of load/store instruction.
+//
+bool emitter::emitInsIsLoadOrStore(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0;
+    else
+        return false;
+}
+
+#undef LD
+#undef ST
+
+/*****************************************************************************
+ *
+ *  Returns the specific encoding of the given CPU instruction.
+ */
+
+inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/)
+{
+    code_t    code           = BAD_CODE;
+
+    // clang-format off
+    const static code_t insCode[] =
+    {
+        #define INSTS(id, nm, fp, info, fmt, e1) e1,
+        #include "instrs.h"
+    };
+    // clang-format on
+
+    code = insCode[ins];
+
+    assert((code != BAD_CODE));
+
+    return code;
+}
+
+/****************************************************************************
+ *
+ *  Add an instruction with no operands.
+ */
+
+void emitter::emitIns(instruction ins)
+{
+    //instrDesc* id  = emitNewInstrSmall(EA_8BYTE);
+    instrDesc* id = emitNewInstr(EA_8BYTE);
+
+    id->idIns(ins);
+    id->idAddr()->iiaSetInstrEncode(emitInsCode(ins));
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an Load/Store instruction(s): base+offset and base-addr-computing if needed.
+ *  For referencing a stack-based local variable and a register
+ */
+void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+    //assert(offs >= 0);
+    ssize_t imm;
+
+    emitAttr  size  = EA_SIZE(attr);//it's better confirm attr with ins.
+
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_st_b:
+        case INS_st_h:
+        case INS_st_w:
+        case INS_fst_s:
+        //case INS_swl:
+        //case INS_swr:
+        //case INS_sdl:
+        //case INS_sdr:
+        case INS_st_d:
+        case INS_fst_d:
+            break;
+
+        default:
+            NYI("emitIns_S_R"); // FP locals?
+            return;
+
+    } // end switch (ins)
+#endif
+
+    /* Figure out the variable's frame position */
+    int  base;
+    bool FPbased;
+
+    base = emitComp->lvaFrameAddress(varx, &FPbased);
+    imm = offs < 0 ? -offs -8: base + offs;
+
+    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+    reg2 = offs < 0 ? REG_R21 : reg2;
+    offs = offs < 0 ? -offs -8: offs;
+
+    if ((-2048 <= imm) && (imm < 2048))
+    {
+        //regs[1] = reg2;
+    }
+    else
+    {
+        ssize_t imm3 = imm & 0x800;
+        ssize_t imm2 = imm + imm3;
+        assert(isValidSimm20(imm2 >> 12));
+        emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12);
+
+        emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2);
+
+        imm2 = imm2 & 0x7ff;
+        imm = imm3 ? imm2 - imm3 : imm2;
+
+        reg2 = REG_RA;
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idReg1(reg1);
+
+    id->idReg2(reg2);
+
+    id->idIns(ins);
+
+    code_t code = emitInsCode(ins);
+    D_INST_2RI12(code, (reg1 & 0x1f), reg2, imm);
+
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+    //assert(offs >= 0);
+    ssize_t imm;
+
+    emitAttr  size  = EA_SIZE(attr);//it's better confirm attr with ins.
+
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_ld_b:
+        case INS_ld_bu:
+
+        case INS_ld_h:
+        case INS_ld_hu:
+
+        case INS_ld_w:
+        case INS_ld_wu:
+        case INS_fld_s:
+
+        case INS_ld_d:
+        case INS_fld_d:
+
+        //case INS_lwl:
+        //case INS_lwr:
+
+        //case INS_ldl:
+        //case INS_ldr:
+            //assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size));
+            break;
+
+        case INS_lea:
+            assert(size == EA_8BYTE);
+            break;
+
+        default:
+            NYI("emitIns_R_S"); // FP locals?
+            return;
+
+    } // end switch (ins)
+#endif
+
+    /* Figure out the variable's frame position */
+    int  base;
+    bool FPbased;
+
+    base = emitComp->lvaFrameAddress(varx, &FPbased);
+    imm = offs < 0 ? -offs -8: base + offs;
+
+    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+    reg2 = offs < 0 ? REG_R21 : reg2;
+    offs = offs < 0 ? -offs -8: offs;
+
+    reg1 = (regNumber)((char)reg1 & 0x1f);
+    code_t code;
+    if ((-2048 <= imm) && (imm < 2048))
+    {
+        if (ins == INS_lea)
+        {
+            ins = INS_addi_d;
+        }
+        code = emitInsCode(ins);
+        D_INST_2RI12(code, reg1, reg2, imm);
+    }
+    else
+    {
+        if (ins == INS_lea)
+        {
+            assert(isValidSimm20(imm >> 12));
+            emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm >> 12);
+            ssize_t imm2 = imm & 0xfff;
+            emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_RA, REG_RA, imm2);
+
+            ins = INS_add_d;
+            code = emitInsCode(ins);
+            D_INST_add_d(code, reg1, reg2, REG_RA);
+        }
+        else
+        {
+            ssize_t imm3 = imm & 0x800;
+            ssize_t imm2 = imm + imm3;
+            assert(isValidSimm20(imm2 >> 12));
+            emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_RA, imm2 >> 12);
+
+            emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2);
+
+            imm2 = imm2 & 0x7ff;
+            code = emitInsCode(ins);
+            D_INST_2RI12(code, reg1/* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2);
+        }
+        //reg2 = REG_RA;
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idReg1(reg1);
+    //id->idReg2(reg2);//not used.
+
+    id->idIns(ins);
+
+    id->idAddr()->iiaSetInstrEncode(code);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a single immediate value.
+ */
+
+void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
+{
+    code_t code = emitInsCode(ins);
+
+    switch (ins)
+    {
+        case INS_b:
+        case INS_bl:
+            assert(!(imm & 0x3));
+            code |= ((imm>>18) & 0x3ff);    //offs[25:16]
+            code |= ((imm>>2) & 0xffff)<<10;//offs[15:0]
+            break;
+        case INS_dbar:
+        case INS_ibar:
+            assert((0 <= imm) && (imm <= 0x7fff));
+            code |= (imm & 0x7fff); //hint
+            break;
+        default:
+            unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs)
+{
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_bceqz:
+        case INS_bcnez:
+            break;
+        //case INS_:
+        //case INS_:
+        //    break;
+
+        default:
+            unreached();
+    }
+#endif
+
+    code_t code = emitInsCode(ins);
+
+    assert(!(offs & 0x3));
+    assert(!(cc >> 3));
+    code |= ((cc & 0x7) << 5);       //cj
+    code |= ((offs >> 18) & 0x1f);     //offs[20:16]
+    code |= ((offs >> 2) & 0xffff)<<10;//offs[15:0]
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a single register.
+ */
+
+void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    code_t code = emitInsCode(ins);
+
+#ifdef DEBUG
+#endif
+    switch (ins)
+    {
+        case INS_jr:
+        case INS_jr_hb:
+        case INS_mthi:
+        case INS_mtlo:
+            code |= (reg & 0x1f)<<21;//rs
+            break;
+
+        case INS_mfhi://mfhi
+        case INS_mflo:
+            code |= (reg & 0x1f)<<11;//rd
+            assert(isGeneralRegister(reg));
+            break;
+
+        default:
+            unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a constant.
+ */
+
+void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
+{
+    code_t code = emitInsCode(ins);
+//#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_lu12i_w:
+        case INS_lu32i_d:
+        case INS_pcaddi:
+        case INS_pcalau12i:
+        case INS_pcaddu12i:
+        case INS_pcaddu18i:
+            assert(isGeneralRegister(reg));
+            assert((-524288 <= imm) && (imm < 524288));
+
+            code |= reg; //rd
+            code |= (imm & 0xfffff)<<5;//si20
+            break;
+        case INS_beqz:
+        case INS_bnez:
+            assert(isGeneralRegisterOrR0(reg));
+            assert(!(imm & 0x3));
+            assert((-1048576 <= (imm>>2)) && ((imm>>2) <= 1048575));
+
+            code |= ((imm>>18) & 0x1f);     //offs[20:16]
+            code |= reg << 5;        //rj
+            code |= ((imm>>2) & 0xffff)<<10;//offs[15:0]
+            break;
+        case INS_movfr2cf:
+            assert(isFloatReg(reg));
+            assert((0 <= imm) && (imm <= 7));
+
+            code |= (reg & 0x1f)<<5;//fj
+            code |= imm /*& 0x7*/;  //cc
+            break;
+        case INS_movcf2fr:
+            assert(isFloatReg(reg));
+            assert((0 <= imm) && (imm <= 7));
+
+            code |= (reg & 0x1f);//fd
+            code |= (imm /*& 0x7*/)<<5;  //cc
+            break;
+        case INS_movgr2cf:
+            assert(isGeneralRegister(reg));
+            assert((0 <= imm) && (imm <= 7));
+
+            code |= reg<<5;//rj
+            code |= imm /*& 0x7*/;  //cc
+            break;
+        case INS_movcf2gr:
+            assert(isGeneralRegister(reg));
+            assert((0 <= imm) && (imm <= 7));
+
+            code |= reg;//rd
+            code |= (imm /*& 0x7*/)<<5;  //cc
+            break;
+        default:
+            unreached();
+            break;
+    } // end switch (ins)
+//#endif
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+//NOTEADD:This function is new in emitarm64.cpp,so it be added to emitloongarch.cpp.
+//        But I don't konw how to change it so that it can be used on LA.
+//        I just add a statement "assert(!"unimplemented on LOONGARCH yet");".
+//------------------------------------------------------------------------
+// emitIns_Mov: Emits a move instruction
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    dstReg    -- The destination register
+//    srcReg    -- The source register
+//    canSkip   -- true if the move can be elided when dstReg == srcReg, otherwise false
+//    insOpts   -- The instruction options
+//
+void emitter::emitIns_Mov(
+    instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */)
+{//TODO: should amend for LoongArch64/LOONGARCH64.
+    assert(IsMovInstruction(ins));
+
+    emitIns_R_R(ins, attr, dstReg, srcReg);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers
+ */
+
+void emitter::emitIns_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */)
+{
+    code_t code = emitInsCode(ins);
+
+    if (INS_mov == ins) {
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        code |= reg1;    //rd
+        code |= reg2<<5; //rj
+    }
+    else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg)) {
+        //case INS_ext_w_b:
+        //case INS_ext_w_h:
+        //case INS_clo_w:
+        //case INS_clz_w:
+        //case INS_cto_w:
+        //case INS_ctz_w:
+        //case INS_clo_d:
+        //case INS_clz_d:
+        //case INS_cto_d:
+        //case INS_ctz_d:
+        //case INS_revb_2h:
+        //case INS_revb_4h:
+        //case INS_revb_2w:
+        //case INS_revb_d:
+        //case INS_revh_2w:
+        //case INS_revh_d:
+        //case INS_bitrev_4b:
+        //case INS_bitrev_8b:
+        //case INS_bitrev_w:
+        //case INS_bitrev_d:
+        //case INS_rdtimel_w:
+        //case INS_rdtimeh_w:
+        //case INS_rdtime_d:
+        //case INS_cpucfg:
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        code |= reg1;   //rd
+        code |= reg2 << 5;//rj
+    }
+    else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins)) {
+        //case INS_asrtle_d:
+        //case INS_asrtgt_d:
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        code |= reg1 << 5;  //rj
+        code |= reg2 << 10; //rk
+    }
+    else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d)) {
+        //case INS_fabs_s:
+        //case INS_fabs_d:
+        //case INS_fneg_s:
+        //case INS_fneg_d:
+        //case INS_fsqrt_s:
+        //case INS_fsqrt_d:
+        //case INS_frsqrt_s:
+        //case INS_frsqrt_d:
+        //case INS_frecip_s:
+        //case INS_frecip_d:
+        //case INS_flogb_s:
+        //case INS_flogb_d:
+        //case INS_fclass_s:
+        //case INS_fclass_d:
+        //case INS_fcvt_s_d:
+        //case INS_fcvt_d_s:
+        //case INS_ffint_s_w:
+        //case INS_ffint_s_l:
+        //case INS_ffint_d_w:
+        //case INS_ffint_d_l:
+        //case INS_ftint_w_s:
+        //case INS_ftint_w_d:
+        //case INS_ftint_l_s:
+        //case INS_ftint_l_d:
+        //case INS_ftintrm_w_s:
+        //case INS_ftintrm_w_d:
+        //case INS_ftintrm_l_s:
+        //case INS_ftintrm_l_d:
+        //case INS_ftintrp_w_s:
+        //case INS_ftintrp_w_d:
+        //case INS_ftintrp_l_s:
+        //case INS_ftintrp_l_d:
+        //case INS_ftintrz_w_s:
+        //case INS_ftintrz_w_d:
+        //case INS_ftintrz_l_s:
+        //case INS_ftintrz_l_d:
+        //case INS_ftintrne_w_s:
+        //case INS_ftintrne_w_d:
+        //case INS_ftintrne_l_s:
+        //case INS_ftintrne_l_d:
+        //case INS_frint_s:
+        //case INS_frint_d:
+        //case INS_fmov_s:
+        //case INS_fmov_d:
+        assert(isFloatReg(reg1));
+        assert(isFloatReg(reg2));
+        code |= (reg1 & 0x1f);    //fd
+        code |= (reg2 & 0x1f)<<5; //fj
+    }
+    else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w)) {
+        //case INS_movgr2fr_w:
+        //case INS_movgr2fr_d:
+        //case INS_movgr2frh_w:
+        assert(isFloatReg(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        code |= (reg1 & 0x1f);    //fd
+        code |= reg2 << 5; //rj
+    }
+    else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s)) {
+        //case INS_movfr2gr_s:
+        //case INS_movfr2gr_d:
+        //case INS_movfrh2gr_s:
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isFloatReg(reg2));
+        code |= reg1;    //rd
+        code |= (reg2 & 0x1f)<<5; //fj
+    }
+    else if ((INS_dneg == ins) || (INS_neg == ins))
+    {
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        //sub_d rd, zero, rk
+        //sub_w rd, zero, rk
+        code |= reg1;       //rd
+        code |= reg2 << 10; //rk
+    }
+    else if (INS_not == ins)
+    {
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        //nor rd, rj, zero
+        code |= reg1;      //rd
+        code |= reg2 << 5; //rj
+    }
+    else
+    {
+        unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg, ssize_t hint, ssize_t off, insOpts opt /* = INS_OPTS_NONE */)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_pref:
+            assert(isGeneralRegister(reg));
+            assert((-32769 < off) && (off < 32768));
+            break;
+
+        default:
+            unreached();
+    }
+#endif
+    code_t code = emitInsCode(ins);
+
+    code |= (hint & 0x1f)<<16; //hint
+    code |= (reg & 0x1f)<<21; //rs or base
+    code |= (off & 0xffff);   //offset
+
+    ssize_t imms[] = {hint, off};
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and a constant.
+ */
+
+void emitter::emitIns_R_R_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
+{
+    code_t code = emitInsCode(ins);
+
+    if ((INS_slli_w <= ins) && (ins <= INS_rotri_w)) {
+        //INS_slli_w
+        //INS_srli_w
+        //INS_srai_w
+        //INS_rotri_w
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((0 <= imm) && (imm <= 0x1f));
+
+        code |= reg1;    //rd
+        code |= reg2<<5; //rj
+        code |= (imm & 0x1f)<<10;//ui5
+    }
+    else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d)) {
+        //INS_slli_d
+        //INS_srli_d
+        //INS_srai_d
+        //INS_rotri_d
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((0 <= imm) && (imm <= 0x3f));
+
+        code |= reg1;    //rd
+        code |= reg2<<5; //rj
+        code |= (imm & 0x3f)<<10;//ui6
+    }
+    else if (((INS_addi_w <= ins) && (ins <= INS_xori)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || ((INS_st_b <= ins) && (ins <= INS_st_d))) {
+#ifdef DEBUG
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || ((INS_st_b <= ins) && (ins <= INS_st_d))) {
+            //case INS_addi_w:
+            //case INS_addi_d:
+            //case INS_lu52i_d:
+            //case INS_slti:
+            //case INS_ld_b:
+            //case INS_ld_h:
+            //case INS_ld_w:
+            //case INS_ld_d:
+            //case INS_ld_bu:
+            //case INS_ld_hu:
+            //case INS_ld_wu:
+            //case INS_st_b:
+            //case INS_st_h:
+            //case INS_st_w:
+            //case INS_st_d:
+
+            assert((-2048 <= imm) && (imm <= 2047));
+        }
+        else if (ins == INS_sltui)
+        {
+            //case INS_sltui:
+            assert((0 <= imm) && (imm <= 0x7ff));
+        }
+        else
+        {
+            //case INS_andi:
+            //case INS_ori:
+            //case INS_xori:
+            assert((0 <= imm) && (imm <= 0xfff));
+        }
+#endif
+        code |= reg1;    //rd
+        code |= reg2<<5; //rj
+        code |= (imm & 0xfff)<<10;//si12 or ui12
+    }
+    else if ((INS_fld_s <= ins) && (ins <= INS_fst_d)) {
+        //INS_fld_s
+        //INS_fld_d
+        //INS_fst_s
+        //INS_fst_d
+        assert(isFloatReg(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((-2048 <= imm) && (imm <= 2047));
+
+        code |= reg1 & 0x1f;    //fd
+        code |= reg2 << 5; //rj
+        code |= (imm & 0xfff)<<10;//si12
+    }
+    else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w))) {
+        //INS_ldptr_w
+        //INS_ldptr_d
+        //INS_ll_w
+        //INS_ll_d
+
+        //INS_stptr_w
+        //INS_stptr_d
+        //INS_sc_w
+        //INS_sc_d
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((-8192 <= imm) && (imm <= 8191));
+
+        code |= reg1;    //rd
+        code |= reg2 << 5; //rj
+        code |= (imm & 0x3fff)<<10;//si14
+    }
+    else if ((INS_beq <= ins) && (ins <= INS_bgeu))
+    {
+        //INS_beq
+        //INS_bne
+        //INS_blt
+        //INS_bltu
+        //INS_bge
+        //INS_bgeu
+        assert(isGeneralRegisterOrR0(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(!(imm & 0x3));
+        assert((-32768 <= (imm>>2)) && ((imm>>2) <= 32767));
+
+        code |= reg1 << 5;  //rj
+        code |= reg2;       //rd
+        code |= ((imm>>2) & 0xffff)<<10;//offs16
+    }
+    else if ((INS_fcmp_caf_s <= ins) && (ins <= INS_fcmp_sune_s))
+    {
+        //INS_fcmp_caf_s
+        //INS_fcmp_cun_s
+        //INS_fcmp_ceq_s
+        //INS_fcmp_cueq_s
+        //INS_fcmp_clt_s
+        //INS_fcmp_cult_s
+        //INS_fcmp_cle_s
+        //INS_fcmp_cule_s
+        //INS_fcmp_cne_s
+        //INS_fcmp_cor_s
+        //INS_fcmp_cune_s
+        //INS_fcmp_saf_d
+        //INS_fcmp_sun_d
+        //INS_fcmp_seq_d
+        //INS_fcmp_sueq_d
+        //INS_fcmp_slt_d
+        //INS_fcmp_sult_d
+        //INS_fcmp_sle_d
+        //INS_fcmp_sule_d
+        //INS_fcmp_sne_d
+        //INS_fcmp_sor_d
+        //INS_fcmp_sune_d
+        //INS_fcmp_caf_d
+        //INS_fcmp_cun_d
+        //INS_fcmp_ceq_d
+        //INS_fcmp_cueq_d
+        //INS_fcmp_clt_d
+        //INS_fcmp_cult_d
+        //INS_fcmp_cle_d
+        //INS_fcmp_cule_d
+        //INS_fcmp_cne_d
+        //INS_fcmp_cor_d
+        //INS_fcmp_cune_d
+        //INS_fcmp_saf_s
+        //INS_fcmp_sun_s
+        //INS_fcmp_seq_s
+        //INS_fcmp_sueq_s
+        //INS_fcmp_slt_s
+        //INS_fcmp_sult_s
+        //INS_fcmp_sle_s
+        //INS_fcmp_sule_s
+        //INS_fcmp_sne_s
+        //INS_fcmp_sor_s
+        //INS_fcmp_sune_s
+        assert(isFloatReg(reg1));
+        assert(isFloatReg(reg2));
+        assert((0 <= imm) && (imm <= 7));
+
+        code |= (reg1 & 0x1f)<<5;   //fj
+        code |= (reg2 & 0x1f)<<10;  //fk
+        code |= imm & 0x7; //cc
+    }
+    else if (INS_addu16i_d == ins) {
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((-32768 <= imm) && (imm < 32768));
+
+        code |= reg1;    //rd
+        code |= reg2<<5; //rj
+        code |= (imm & 0xffff)<<10;//si16
+    }
+    else if (INS_jirl == ins)
+    {
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert((-32768 <= imm) && (imm < 32768));
+
+        code |= reg1;    //rd
+        code |= reg2<<5; //rj
+        code |= (imm & 0xffff)<<10;//offs16
+    }
+    else
+    {
+        unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+*
+*  Add an instruction referencing two registers and a constant.
+*  Also checks for a large immediate that needs a second instruction
+*  and will load it in reg1
+*
+*  - Supports instructions: add, adds, sub, subs, and, ands, eor and orr
+*  - Requires that reg1 is a general register and not SP or ZR
+*  - Requires that reg1 != reg2
+*/
+void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm)
+{//maybe optimize.
+    assert(isGeneralRegister(reg1));
+    assert(reg1 != reg2);
+
+    bool immFits = true;
+
+#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_addi_w:
+        case INS_addi_d:
+        //case INS_lui:
+        //case INS_lbu:
+        //case INS_lhu:
+        //case INS_lwu:
+        //case INS_lb:
+        //case INS_lh:
+        //case INS_lw:
+        case INS_ld_d:
+        //case INS_sb:
+        //case INS_sh:
+        //case INS_sw:
+        //case INS_sd:
+        ////case INS_lwc1:
+        ////case INS_ldc1:
+            immFits = isValidSimm12(imm);
+            break;
+
+        case INS_andi:
+        case INS_ori:
+        case INS_xori:
+            immFits = (0 <= imm) && (imm <= 0xfff);
+            break;
+
+        default:
+            assert(!"Unsupported instruction in emitIns_R_R_Imm");
+    }
+#endif
+
+    if (immFits)
+    {
+        emitIns_R_R_I(ins, attr, reg1, reg2, imm);
+    }
+    else
+    {
+        // Load 'imm' into the reg1 register
+        // then issue:   'ins'  reg1, reg2, reg1
+        //
+        assert(!EA_IS_RELOC(attr));
+        emitIns_I_la(attr, reg1, imm);
+        //codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm);
+        emitIns_R_R_R(ins, attr, reg1, reg2, reg1);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers.
+ */
+
+void emitter::emitIns_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt) /* = INS_OPTS_NONE */
+{
+    code_t code = emitInsCode(ins);
+
+    if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) || ((INS_stx_b <= ins) && (ins <= INS_stle_d))) {
+        //case INS_add_w:
+        //case INS_add_d:
+        //case INS_sub_w:
+        //case INS_sub_d:
+        //case INS_and:
+        //case INS_or:
+        //case INS_nor:
+        //case INS_xor:
+        //case INS_andn:
+        //case INS_orn:
+
+        //case INS_mul_w:
+        //case INS_mul_d:
+        //case INS_mulh_w:
+        //case INS_mulh_wu:
+        //case INS_mulh_d:
+        //case INS_mulh_du:
+        //case INS_mulw_d_w:
+        //case INS_mulw_d_wu:
+        //case INS_div_w:
+        //case INS_div_wu:
+        //case INS_div_d:
+        //case INS_div_du:
+        //case INS_mod_w:
+        //case INS_mod_wu:
+        //case INS_mod_d:
+        //case INS_mod_du:
+
+        //case INS_sll_w:
+        //case INS_srl_w:
+        //case INS_sra_w:
+        //case INS_rotr_w:
+        //case INS_sll_d:
+        //case INS_srl_d:
+        //case INS_sra_d:
+        //case INS_rotr_d:
+
+        //case INS_maskeqz:
+        //case INS_masknez:
+
+        //case INS_slt:
+        //case INS_sltu:
+
+        //case INS_ldx_b:
+        //case INS_ldx_h:
+        //case INS_ldx_w:
+        //case INS_ldx_d:
+        //case INS_ldx_bu:
+        //case INS_ldx_hu:
+        //case INS_ldx_wu:
+        //case INS_stx_b:
+        //case INS_stx_h:
+        //case INS_stx_w:
+        //case INS_stx_d:
+
+        //case INS_ldgt_b:
+        //case INS_ldgt_h:
+        //case INS_ldgt_w:
+        //case INS_ldgt_d:
+        //case INS_ldle_b:
+        //case INS_ldle_h:
+        //case INS_ldle_w:
+        //case INS_ldle_d:
+        //case INS_stgt_b:
+        //case INS_stgt_h:
+        //case INS_stgt_w:
+        //case INS_stgt_d:
+        //case INS_stle_b:
+        //case INS_stle_h:
+        //case INS_stle_w:
+        //case INS_stle_d:
+
+        //case INS_amswap_w:
+        //case INS_amswap_d:
+        //case INS_amswap_db_w:
+        //case INS_amswap_db_d:
+        //case INS_amadd_w:
+        //case INS_amadd_d:
+        //case INS_amadd_db_w:
+        //case INS_amadd_db_d:
+        //case INS_amand_w:
+        //case INS_amand_d:
+        //case INS_amand_db_w:
+        //case INS_amand_db_d:
+        //case INS_amor_w:
+        //case INS_amor_d:
+        //case INS_amor_db_w:
+        //case INS_amor_db_d:
+        //case INS_amxor_w:
+        //case INS_amxor_d:
+        //case INS_amxor_db_w:
+        //case INS_amxor_db_d:
+        //case INS_ammax_w:
+        //case INS_ammax_d:
+        //case INS_ammax_db_w:
+        //case INS_ammax_db_d:
+        //case INS_ammin_w:
+        //case INS_ammin_d:
+        //case INS_ammin_db_w:
+        //case INS_ammin_db_d:
+        //case INS_ammax_wu:
+        //case INS_ammax_du:
+        //case INS_ammax_db_wu:
+        //case INS_ammax_db_du:
+        //case INS_ammin_wu:
+        //case INS_ammin_du:
+        //case INS_ammin_db_wu:
+        //case INS_ammin_db_du:
+
+        //case INS_crc_w_b_w:
+        //case INS_crc_w_h_w:
+        //case INS_crc_w_w_w:
+        //case INS_crc_w_d_w:
+        //case INS_crcc_w_b_w:
+        //case INS_crcc_w_h_w:
+        //case INS_crcc_w_w_w:
+        //case INS_crcc_w_d_w:
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(isGeneralRegisterOrR0(reg3));
+
+        code |= (reg1 /*& 0x1f*/);    //rd
+        code |= (reg2 /*& 0x1f*/)<<5; //rj
+        code |= (reg3 /*& 0x1f*/)<<10;//rk
+    }
+    else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d)) {
+        //case INS_fadd_s:
+        //case INS_fadd_d:
+        //case INS_fsub_s:
+        //case INS_fsub_d:
+        //case INS_fmul_s:
+        //case INS_fmul_d:
+        //case INS_fdiv_s:
+        //case INS_fdiv_d:
+        //case INS_fmax_s:
+        //case INS_fmax_d:
+        //case INS_fmin_s:
+        //case INS_fmin_d:
+        //case INS_fmaxa_s:
+        //case INS_fmaxa_d:
+        //case INS_fmina_s:
+        //case INS_fmina_d:
+        //case INS_fscaleb_s:
+        //case INS_fscaleb_d:
+        //case INS_fcopysign_s:
+        //case INS_fcopysign_d:
+        assert(isFloatReg(reg1));
+        assert(isFloatReg(reg2));
+        assert(isFloatReg(reg3));
+
+        code |= (reg1 & 0x1f);    //fd
+        code |= (reg2 & 0x1f)<<5; //fj
+        code |= (reg3 & 0x1f)<<10;//fk
+    }
+    else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d)) {
+        //case INS_fldx_s:
+        //case INS_fldx_d:
+        //case INS_fstx_s:
+        //case INS_fstx_d:
+
+        //case INS_fldgt_s:
+        //case INS_fldgt_d:
+        //case INS_fldle_s:
+        //case INS_fldle_d:
+        //case INS_fstgt_s:
+        //case INS_fstgt_d:
+        //case INS_fstle_s:
+        //case INS_fstle_d:
+        assert(isFloatReg(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(isGeneralRegisterOrR0(reg3));
+
+        code |= reg1 & 0x1f; //fd
+        code |= reg2 << 5;   //rj
+        code |= reg3 << 10;  //rk
+    }
+    else
+    {
+        assert(!"Unsupported instruction in emitIns_R_R_R");
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers and a constant.
+ */
+
+void emitter::emitIns_R_R_R_I(instruction ins,
+                              emitAttr    attr,
+                              regNumber   reg1,
+                              regNumber   reg2,
+                              regNumber   reg3,
+                              ssize_t     imm,
+                              insOpts     opt /* = INS_OPTS_NONE */,
+                              emitAttr    attrReg2 /* = EA_UNKNOWN */)
+{
+    code_t code = emitInsCode(ins);
+
+    if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w)) {
+        //INS_alsl_w
+        //INS_alsl_wu
+        //INS_alsl_d
+        //INS_bytepick_w
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(isGeneralRegisterOrR0(reg3));
+        assert((0 <= imm) && (imm <= 3));
+
+        code |= reg1;    //rd
+        code |= reg2 << 5; //rj
+        code |= reg3 << 10;//rk
+        code |= (imm /*& 0x3*/)<<15; //sa2
+    }
+    else if (INS_bytepick_d == ins) {
+        assert(isGeneralRegister(reg1));
+        assert(isGeneralRegisterOrR0(reg2));
+        assert(isGeneralRegisterOrR0(reg3));
+        assert((0 <= imm) && (imm <= 7));
+
+        code |= reg1;    //rd
+        code |= reg2 << 5; //rj
+        code |= reg3 << 10;//rk
+        code |= (imm /*& 0x7*/)<<15;  //sa3
+    }
+    else if (INS_fsel == ins)
+    {
+        assert(isFloatReg(reg1));
+        assert(isFloatReg(reg2));
+        assert(isFloatReg(reg3));
+        assert((0 <= imm) && (imm <= 7));
+
+        code |= (reg1 & 0x1f);     //fd
+        code |= (reg2 & 0x1f)<<5;  //fj
+        code |= (reg3 & 0x1f)<<10; //fk
+        code |= (imm /*& 0x7*/)<<15;   //ca
+    }
+    else
+    {
+        unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+#if 1
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers, with an extend option
+ */
+
+void emitter::emitIns_R_R_R_Ext(instruction ins,
+                                emitAttr    attr,
+                                regNumber   reg1,
+                                regNumber   reg2,
+                                regNumber   reg3,
+                                insOpts     opt,         /* = INS_OPTS_NONE */
+                                int         shiftAmount) /* = -1 -- unset   */
+{
+assert(!"unimplemented on LOONGARCH yet");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and two constants.
+ */
+
+void emitter::emitIns_R_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt)
+{
+    code_t code = emitInsCode(ins);
+
+    assert(isGeneralRegisterOrR0(reg1));
+    assert(isGeneralRegisterOrR0(reg2));
+    switch (ins)
+    {
+        case INS_bstrins_w:
+        case INS_bstrpick_w:
+            code |= (reg1 /*& 0x1f*/);    //rd
+            code |= (reg2 /*& 0x1f*/)<<5; //rj
+            assert((0<=imm2) && (imm2<=imm1) && (imm1<32));
+            code |= (imm1 & 0x1f)<<16;    //msbw
+            code |= (imm2 & 0x1f)<<10;    //lsbw
+            break;
+        case INS_bstrins_d:
+        case INS_bstrpick_d:
+            code |= (reg1 /*& 0x1f*/);    //rd
+            code |= (reg2 /*& 0x1f*/)<<5; //rj
+            assert((0<=imm2) && (imm2<=imm1) && (imm1<64));
+            code |= (imm1 & 0x3f)<<16;    //msbd
+            code |= (imm2 & 0x3f)<<10;    //lsbd
+            break;
+        default:
+            unreached();
+    }
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing four registers.
+ */
+
+void emitter::emitIns_R_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4)
+{
+    code_t code = emitInsCode(ins);
+
+//#ifdef DEBUG
+    switch (ins)
+    {
+        case INS_fmadd_s:
+        case INS_fmadd_d:
+        case INS_fmsub_s:
+        case INS_fmsub_d:
+        case INS_fnmadd_s:
+        case INS_fnmadd_d:
+        case INS_fnmsub_s:
+        case INS_fnmsub_d:
+            assert(isFloatReg(reg1));
+            assert(isFloatReg(reg2));
+            assert(isFloatReg(reg3));
+            assert(isFloatReg(reg4));
+
+            code |= (reg1 & 0x1f);     //fd
+            code |= (reg2 & 0x1f)<<5;  //fj
+            code |= (reg3 & 0x1f)<<10; //fk
+            code |= (reg4 & 0x1f)<<15; //fa
+            break;
+        default:
+            unreached();
+    }
+//#endif
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idAddr()->iiaSetInstrEncode(code);
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static data member operand. If 'size' is 0, the
+ *  instruction operates on the address of the static member instead of its
+ *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
+ */
+
+void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NYI("emitIns_C");
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing stack-based local variable.
+ */
+
+void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NYI("emitIns_S");
+#endif
+}
+
+#if 0
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a stack-based local variable.
+ */
+
+void emitter::emitIns_R_R_S(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int sa)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+#if 1
+    regNumber regs[] = {reg1, reg2};
+    ssize_t imm = (ssize_t)sa;
+    emitAllocInstrOnly(emitInsOps(ins, regs, &imm), attr);
+#else
+    instrDesc* id = emitNewInstrCns(attr, sa);
+    insFormat fmt = IF_FMT_FUNC;
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    //dispIns(id);
+    appendToCurIG(id);
+#endif
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two register and consectutive stack-based local variable slots.
+ */
+void emitter::emitIns_R_R_S_S(
+    instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
+{
+assert(!"unimplemented on LOONGARCH yet");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing consecutive stack-based local variable slots and two registers
+ */
+void emitter::emitIns_S_S_R_R(
+    instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
+{
+assert(!"unimplemented on LOONGARCH yet");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing stack-based local variable and an immediate
+ */
+void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NYI("emitIns_S_I");
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a register + static member operands.
+ *  Constant is stored into JIT data which is adjacent to code.
+ *  For LOONGARCH64, maybe not the best, here just suports the func-interface.
+ *
+ */
+void emitter::emitIns_R_C(
+    instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+    assert(offs >= 0);
+    assert(instrDesc::fitsInSmallCns(offs));//can optimize.
+    //assert(ins == INS_bl);//for special. indicating isGeneralRegister(reg).
+    //assert(isGeneralRegister(reg)); while load float the reg is FPR.
+
+    //when id->idIns == bl, for reloc! 4-ins.
+    //   pcaddu12i reg, off-hi-20bits
+    //   addi_d  reg, reg, off-lo-12bits
+    //when id->idIns == load-ins, for reloc! 4-ins.
+    //   pcaddu12i reg, off-hi-20bits
+    //   load  reg, offs_lo-12bits(reg)    #when ins is load ins.
+    //
+    // INS_OPTS_RC: ins == bl placeholders.  3-ins:       ////TODO: maybe optimize.
+    //   lu12i_w reg, addr-hi-20bits
+    //   ori     reg, reg, addr-lo-12bits
+    //   lu32i_d reg, addr_hi-32bits
+    //
+    // INS_OPTS_RC: ins == load.  3-ins:
+    //   lu12i_w at, offs_hi-20bits           //NOTE: offs = (int)(offs_hi<<12) + (int)offs_lo
+    //   lu32i_d at, 0xff  addr_hi-32bits
+    //   load  reg, addr_lo-12bits(reg)    #when ins is load ins.
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    assert(reg != REG_R0); //for special. reg Must not be R0.
+    id->idReg1(reg); // destination register that will get the constant value.
+
+    id->idSmallCns(offs); //usually is 0.
+    id->idInsOpt(INS_OPTS_RC);
+    if (emitComp->opts.compReloc)
+    {
+        id->idSetIsDspReloc();
+        id->idCodeSize(8);
+    } else
+        id->idCodeSize(12);//TODO: maybe optimize.
+
+    if (EA_IS_GCREF(attr))
+    {
+        /* A special value indicates a GCref pointer value */
+        id->idGCref(GCT_GCREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+    else if (EA_IS_BYREF(attr))
+    {
+        /* A special value indicates a Byref pointer value */
+        id->idGCref(GCT_BYREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+
+    //TODO: this maybe deleted.
+    id->idSetIsBound(); // We won't patch address since we will know the exact distance
+                        // once JIT code and data are allocated together.
+
+    assert(addrReg == REG_NA);//NOTE: for LOONGARCH64, not support addrReg != REG_NA.
+
+    id->idAddr()->iiaFieldHnd = fldHnd;
+
+    //dispIns(id);//loongarch dumping instr by other-fun.
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static member + constant.
+ */
+
+void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NYI("emitIns_C_I");
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static member + register operands.
+ */
+
+void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    assert(!"emitIns_C_R not supported for RyuJIT backend");
+#endif
+}
+
+void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NYI("emitIns_R_AR");
+#endif
+}
+
+// This computes address from the immediate which is relocatable.
+void emitter::emitIns_R_AI(instruction ins,
+                           emitAttr    attr,
+                           regNumber   reg,
+                           ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
+{
+    assert(EA_IS_RELOC(attr));//EA_PTR_DSP_RELOC
+    assert(ins == INS_bl);//for special.
+    assert(isGeneralRegister(reg));
+
+    // INS_OPTS_RELOC: placeholders.  2-ins:
+    //  case:EA_HANDLE_CNS_RELOC
+    //   pcaddu12i  reg, off-hi-20bits
+    //   addi_d  reg, reg, off-lo-12bits
+    //  case:EA_PTR_DSP_RELOC
+    //   pcaddu12i  reg, off-hi-20bits
+    //   ldptr_d  reg, reg, off-lo-12bits
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    assert(reg != REG_R0); //for special. reg Must not be R0.
+    id->idReg1(reg); // destination register that will get the constant value.
+
+    id->idInsOpt(INS_OPTS_RELOC);
+
+    if (EA_IS_GCREF(attr))
+    {
+        /* A special value indicates a GCref pointer value */
+        id->idGCref(GCT_GCREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+    else if (EA_IS_BYREF(attr))
+    {
+        /* A special value indicates a Byref pointer value */
+        id->idGCref(GCT_BYREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+
+    id->idAddr()->iiaAddr = (BYTE*)addr;
+
+    id->idCodeSize(8);
+    //dispIns(id);//loongarch dumping instr by other-fun.
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NYI("emitIns_AR_R");
+#endif
+}
+
+void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NYI("emitIns_R_ARR");
+#endif
+}
+
+void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NYI("emitIns_R_ARR");
+#endif
+}
+
+void emitter::emitIns_R_ARX(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NYI("emitIns_R_ARR");
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Add a data label instruction.
+ */
+void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg)
+{
+    NYI("emitIns_R_D");
+}
+
+void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int imm)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Record that a jump instruction uses the short encoding
+ *
+ */
+void emitter::emitSetShortJump(instrDescJmp* id)
+{
+/* TODO: maybe delete it on future. */
+    return;
+}
+
+/*****************************************************************************
+ *
+ *  Add a label instruction.
+ */
+
+void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+    assert(dst->bbFlags & BBF_HAS_LABEL);
+
+    //if for reloc!  4-ins:
+    //   pcaddu12i reg, offset-hi20
+    //   addi_d  reg, reg, offset-lo12
+    //
+    //else:  3-ins:
+    //   lu12i_w reg, dst-hi-20bits
+    //   ori reg, reg, dst-lo-12bits
+    //   bstrins_d  reg, zero, msbd, lsbd / lu32i_d reg, 0xff
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsOpt(INS_OPTS_RL);
+    id->idAddr()->iiaBBlabel = dst;
+
+    if (emitComp->opts.compReloc)
+    {
+        id->idSetIsDspReloc();
+        id->idCodeSize(8);
+    } else
+        id->idCodeSize(12);
+
+    id->idReg1(reg);
+
+    if (EA_IS_GCREF(attr))
+    {
+        /* A special value indicates a GCref pointer value */
+        id->idGCref(GCT_GCREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+    else if (EA_IS_BYREF(attr))
+    {
+        /* A special value indicates a Byref pointer value */
+        id->idGCref(GCT_BYREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+
+#ifdef DEBUG
+    // Mark the catch return
+    if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
+    {
+        id->idDebugOnlyInfo()->idCatchRet = true;
+    }
+#endif // DEBUG
+
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+    assert(!"unimplemented on LOONGARCH yet: emitIns_J_R.");//not used.
+}
+
+//NOTE:
+//  For loongarch64, emitIns_J is just only jump, not include the condition branch!
+//  The condition branch is the emitIns_J_cond_la().
+//  If using "BasicBlock* dst" lable as target, the INS_OPTS_J is a short jump while long jump will be replace by INS_OPTS_JIRL.
+//
+//  The arg "instrCount" is two regs's encoding when ins is beq/bne/blt/bltu/bge/bgeu/beqz/bnez.
+void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
+{
+    if (dst == nullptr)
+    {//Now this case not used for loongarch64.
+        assert(instrCount != 0);
+        assert(ins == INS_b);//when dst==nullptr, ins is INS_b by now.
+
+#if 1
+        assert((-33554432 <= instrCount) && (instrCount < 33554432));//0x2000000.
+        emitIns_I(ins, EA_PTRSIZE, instrCount << 2);//NOTE: instrCount is the number of the instructions.
+#else
+        instrCount = instrCount << 2;
+        if ((-33554432 <= instrCount) && (instrCount < 33554432))
+        {
+            /* This jump is really short */
+            emitIns_I(ins, EA_PTRSIZE, instrCount);
+        }
+        else
+        {
+            //NOTE: should not be here !!!
+            assert(!"should not be here on LOONGARCH64 !!!");
+
+            //emitIns_I(INS_bl, EA_PTRSIZE, 4);
+
+            //ssize_t imm = ((ssize_t)instrCount>>12);
+            //assert(isValidSimm12(imm));
+            //emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm);
+            //imm = (instrCount & 0xfffff);
+            //emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, imm);
+
+            //emitIns_R_R_R(INS_add_d, EA_8BYTE, REG_R21, REG_R21, REG_RA);
+            //emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_R21, 0);
+        }
+#endif
+        return ;
+    }
+
+    // (dst != nullptr)
+    //
+    // INS_OPTS_J: placeholders.  1-ins: if the dst outof-range will be replaced by INS_OPTS_JIRL.
+    //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl  dst
+
+    assert(dst->bbFlags & BBF_HAS_LABEL);
+
+    instrDescJmp* id = emitNewInstrJmp();
+    assert((INS_bceqz <= ins) && (ins <= INS_bl));
+    id->idIns(ins);
+    id->idReg1((regNumber)(instrCount & 0x1f));
+    id->idReg2((regNumber)((instrCount >> 5 ) & 0x1f));
+
+    id->idInsOpt(INS_OPTS_J);
+    emitCounts_INS_OPTS_J++;
+    id->idAddr()->iiaBBlabel = dst;
+
+    if (emitComp->opts.compReloc)
+    {
+        id->idSetIsDspReloc();
+    }
+
+    id->idjShort = false;
+
+    ////TODO: maybe deleted this for loongarch64.
+    id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+#ifdef DEBUG
+    if (emitComp->opts.compLongAddress) // Force long branches
+        id->idjKeepLong = 1;
+#endif // DEBUG
+
+    /* Record the jump's IG and offset within it */
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+    id->idjNext = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+//NOTE:
+//  For loongarch64, emitIns_J_cond_la() is the condition branch.
+//  NOTE: Only supported short branch so far !!!
+//
+void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1, regNumber reg2)
+{
+    //TODO:
+    //   Now the emitIns_J_cond_la() is only the short condition branch.
+    //   There is no long condition branch for loongarch64 so far.
+    //   For loongarch64, the long condition branch is like this:
+    //     --->  branch_condition  condition_target;     //here is the condition branch, short branch is enough.
+    //     --->  jump jump_target; (this supporting the long jump.)
+    //     condition_target:
+    //     ...
+    //     ...
+    //     jump_target:
+    //
+    //
+    // INS_OPTS_J_cond: placeholders.  1-ins.
+    //   ins  reg1, reg2, dst
+
+    assert(dst != nullptr);
+    assert(dst->bbFlags & BBF_HAS_LABEL);
+
+    instrDescJmp* id = emitNewInstrJmp();
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idjShort = false;
+
+    id->idInsOpt(INS_OPTS_J_cond);
+    id->idAddr()->iiaBBlabel = dst;
+
+    id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+#ifdef DEBUG
+    if (emitComp->opts.compLongAddress) // Force long branches
+        id->idjKeepLong = 1;
+#endif // DEBUG
+
+    /* Record the jump's IG and offset within it */
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+    id->idjNext = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    id->idCodeSize(4);
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm)
+{
+    assert(!EA_IS_RELOC(size));
+    assert(isGeneralRegister(reg));
+    //size = EA_SIZE(size);
+
+    if (-1 == (imm >> 11) || 0 == (imm >> 11)) {
+        emitIns_R_R_I(INS_addi_w, size, reg, REG_R0, imm);
+        return;
+    }
+
+    if (0 == (imm >> 12)) {
+        emitIns_R_R_I(INS_ori, size, reg, REG_R0, imm);
+        return;
+    }
+
+    instrDesc* id = emitNewInstr(size);
+
+    if ((imm == INT64_MAX) || (imm == 0xffffffff)) {
+        //emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1);
+        //emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6);
+        id->idReg2((regNumber)1); // special for INT64_MAX(ui6=1) or UINT32_MAX(ui6=32);
+        id->idCodeSize(8);
+    } else if (-1 == (imm >> 31) || 0 == (imm >> 31)) {
+        //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12));
+        //emitIns_R_R_I(INS_ori, size, reg, reg, imm);
+
+        id->idCodeSize(8);
+    } else if (-1 == (imm >> 51) || 0 == (imm >> 51)) {
+        // low-32bits.
+        //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12);
+        //emitIns_R_R_I(INS_ori, size, reg, reg, imm);
+        //
+        // high-20bits.
+        //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
+
+        id->idCodeSize(12);
+    } else {// 0xffff ffff ffff ffff.
+        // low-32bits.
+        //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12));
+        //emitIns_R_R_I(INS_ori, size, reg, reg, imm);
+        //
+        // high-32bits.
+        //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
+        //emitIns_R_R_I(INS_lu52i_d, size, reg, reg, (imm>>52));
+
+        id->idCodeSize(16);
+    }
+
+    id->idIns(INS_lu12i_w);
+    id->idReg1(reg); // destination register that will get the constant value.
+    assert(reg != REG_R0);
+
+    id->idInsOpt(INS_OPTS_I);
+
+    id->idAddr()->iiaAddr = (BYTE*)imm;
+
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a call instruction (direct or indirect).
+ *      argSize<0 means that the caller will pop the arguments
+ *
+ * The other arguments are interpreted depending on callType as shown:
+ * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
+ *
+ * EC_FUNC_TOKEN       : addr is the method address
+ *
+ * If callType is one of these emitCallTypes, addr has to be NULL.
+ * EC_INDIR_R          : "call ireg".
+ *
+ * For LOONGARCH xreg, xmul and disp are never used and should always be 0/REG_NA.
+ *
+ *  Please consult the "debugger team notification" comment in genFnProlog().
+ */
+
+void emitter::emitIns_Call(EmitCallType          callType,
+                           CORINFO_METHOD_HANDLE methHnd,
+                           INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                           void*            addr,
+                           ssize_t          argSize,
+                           emitAttr         retSize
+                           MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                           VARSET_VALARG_TP ptrVars,
+                           regMaskTP        gcrefRegs,
+                           regMaskTP        byrefRegs,
+                           const DebugInfo& di /* = DebugInfo() */,
+                           regNumber        ireg /* = REG_NA */,
+                           regNumber        xreg /* = REG_NA */,
+                           unsigned         xmul /* = 0     */,
+                           ssize_t          disp /* = 0     */,
+                           bool             isJump /* = false */)
+{
+    /* Sanity check the arguments depending on callType */
+
+    assert(callType < EC_COUNT);
+    assert((callType != EC_FUNC_TOKEN) ||
+           (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
+    assert(callType < EC_INDIR_R || addr == NULL);
+    assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
+
+    // ARM never uses these
+    assert(xreg == REG_NA && xmul == 0 && disp == 0);
+
+    // Our stack level should be always greater than the bytes of arguments we push. Just
+    // a sanity test.
+    assert((unsigned)abs(argSize) <= codeGen->genStackLevel);
+
+    // Trim out any callee-trashed registers from the live set.
+    regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd);
+    gcrefRegs &= savedSet;
+    byrefRegs &= savedSet;
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
+        dumpConvertedVarSet(emitComp, ptrVars);
+        printf(", gcrefRegs=");
+        printRegMaskInt(gcrefRegs);
+        emitDispRegSet(gcrefRegs);
+        printf(", byrefRegs=");
+        printRegMaskInt(byrefRegs);
+        emitDispRegSet(byrefRegs);
+        printf("\n");
+    }
+#endif
+
+    /* Managed RetVal: emit sequence point for the call */
+    if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid())
+    {
+        codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false);
+    }
+
+    /*
+        We need to allocate the appropriate instruction descriptor based
+        on whether this is a direct/indirect call, and whether we need to
+        record an updated set of live GC variables.
+     */
+    instrDesc* id;
+
+    assert(argSize % REGSIZE_BYTES == 0);
+    int argCnt = (int)(argSize / (int)REGSIZE_BYTES);
+
+    if (callType >= EC_INDIR_R)
+    {
+        /* Indirect call, virtual calls */
+
+        assert(callType == EC_INDIR_R);
+
+        id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+    }
+    else
+    {
+        /* Helper/static/nonvirtual/function calls (direct or through handle),
+           and calls to an absolute addr. */
+
+        assert(callType == EC_FUNC_TOKEN);
+
+        id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+    }
+
+    /* Update the emitter's live GC ref sets */
+
+    VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
+    emitThisGCrefRegs = gcrefRegs;
+    emitThisByrefRegs = byrefRegs;
+
+    id->idSetIsNoGC(emitNoGChelper(methHnd));
+
+    /* Set the instruction - special case jumping a function */
+    instruction ins;
+
+    ins = INS_jirl; // jirl t2
+    id->idIns(ins);
+
+    id->idInsOpt(INS_OPTS_C);
+    //TODO: maybe optimize.
+
+    // INS_OPTS_C: placeholders.  1/2/4-ins:
+    //   if (callType == EC_INDIR_R)
+    //      jirl REG_R0/REG_RA, ireg, 0   <---- 1-ins
+    //   else if (callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR)
+    //     if reloc:
+    //             //pc + offset_38bits       # only when reloc.
+    //      pcaddu18i  t2, addr-hi20
+    //      jilr r0/1,t2,addr-lo18
+    //
+    //     else:
+    //      lu12i_w  t2, dst_offset_lo32-hi
+    //      ori  t2, t2, dst_offset_lo32-lo
+    //      lu32i_d  t2, dst_offset_hi32-lo
+    //      jirl REG_R0/REG_RA, t2, 0
+
+    /* Record the address: method, indirection, or funcptr */
+    if (callType == EC_INDIR_R)
+    {
+        /* This is an indirect call (either a virtual call or func ptr call) */
+        //assert(callType == EC_INDIR_R);
+
+        id->idSetIsCallRegPtr();
+
+        regNumber reg_jirl = isJump ? REG_R0 : REG_RA;
+        id->idReg4(reg_jirl);
+        id->idReg3(ireg);//NOTE: for EC_INDIR_R, using idReg3.
+        assert(xreg == REG_NA);
+
+        id->idCodeSize(4);
+    }
+    else
+    {
+        /* This is a simple direct call: "call helper/method/addr" */
+
+        assert(callType == EC_FUNC_TOKEN);
+        assert(addr != NULL);
+        assert(((long)addr & 3) == 0);
+
+        addr = (void*)((long)addr + (isJump ? 0 : 1));//NOTE: low-bit0 is used for jirl ra/r0,rd,0
+        id->idAddr()->iiaAddr = (BYTE*)addr;
+
+        if (emitComp->opts.compReloc)
+        {
+            id->idSetIsDspReloc();
+            id->idCodeSize(8);
+        } else {
+            id->idCodeSize(16);
+        }
+    }
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        if (id->idIsLargeCall())
+        {
+            printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+                   VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+        }
+    }
+
+    id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
+    id->idDebugOnlyInfo()->idCallSig   = sigInfo;
+#endif // DEBUG
+
+#ifdef LATE_DISASM
+    if (addr != nullptr)
+    {
+        codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
+    }
+#endif // LATE_DISASM
+
+    //dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Output a call instruction.
+ */
+
+unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
+{
+    unsigned char callInstrSize = sizeof(code_t); // 4 bytes
+    regMaskTP           gcrefRegs;
+    regMaskTP           byrefRegs;
+
+    VARSET_TP GCvars(VarSetOps::UninitVal());
+
+    // Is this a "fat" call descriptor?
+    if (id->idIsLargeCall())
+    {
+        instrDescCGCA* idCall = (instrDescCGCA*)id;
+        gcrefRegs             = idCall->idcGcrefRegs;
+        byrefRegs             = idCall->idcByrefRegs;
+        VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+    }
+    else
+    {
+        assert(!id->idIsLargeDsp());
+        assert(!id->idIsLargeCns());
+
+        gcrefRegs = emitDecodeCallGCregs(id);
+        byrefRegs = 0;
+        VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+    }
+
+    /* We update the GC info before the call as the variables cannot be
+        used by the call. Killing variables before the call helps with
+        boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
+        If we ever track aliased variables (which could be used by the
+        call), we would have to keep them alive past the call. */
+
+    emitUpdateLiveGCvars(GCvars, dst);
+#ifdef DEBUG
+    //NOTEADD:
+    // Output any delta in GC variable info, corresponding to the before-call GC var updates done above.
+    if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC)
+    {
+        emitDispGCVarDelta(); //define in emit.cpp
+    }
+#endif // DEBUG
+
+    assert(id->idIns() == INS_jirl);
+    if (id->idIsCallRegPtr())
+    {//EC_INDIR_R
+        code = emitInsCode(id->idIns());
+        D_INST_JIRL(code, id->idReg4(), id->idReg3(), 0);
+    }
+    else if (id->idIsReloc())
+    {
+        // pc + offset_38bits
+        //
+        //   pcaddu18i  t2, addr-hi20
+        //   jilr r0/1,t2,addr-lo18
+
+        long addr = (long)id->idAddr()->iiaAddr;//get addr.
+        //should assert(addr-dst < 38bits);
+
+        int reg2 = (int)addr & 1;
+        addr = addr ^ 1;
+
+        emitRecordRelocation(dst, (BYTE*)addr, IMAGE_REL_LOONGARCH64_PC);
+
+        *(code_t *)dst = 0x1e00000e;
+        dst += 4;
+#ifdef DEBUG
+        code = emitInsCode(INS_pcaddu18i);
+        assert((code | (14)) == 0x1e00000e);
+        assert((int)REG_T2 == 14);
+        code = emitInsCode(INS_jirl);
+        assert(code == 0x4c000000);
+#endif
+        *(code_t *)dst = 0x4c000000 | (14<<5) | reg2;
+    }
+    else
+    {
+    //      lu12i_w  t2, dst_offset_lo32-hi   //TODO: maybe optimize.
+    //      ori  t2, t2, dst_offset_lo32-lo
+    //      lu32i_d  t2, dst_offset_hi32-lo
+    //      jirl  t2
+
+        ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr);
+        //assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff.
+        assert((imm >> 32) == 0xff);//for LA64 addr-is 0xff. but this is not the best !!!
+
+        int reg2 = (int)(imm & 1);
+        imm -= reg2;
+
+        code = emitInsCode(INS_lu12i_w);
+        D_INST_lu12i_w(code, REG_T2, imm >> 12);
+        *(code_t *)dst = code;
+        dst += 4;
+
+        code = emitInsCode(INS_ori);
+        D_INST_ori(code, REG_T2, REG_T2, imm);
+        *(code_t *)dst = code;
+        dst += 4;
+
+        //emitIns_R_I(INS_lu32i_d, size, REG_T2, imm >> 32);
+        code = emitInsCode(INS_lu32i_d);
+        //D_INST_lu32i_d(code, REG_T2, imm >> 32);
+        D_INST_lu32i_d(code, REG_T2, 0xff);
+        *(code_t *)dst = code;
+        dst += 4;
+
+        code = emitInsCode(INS_jirl);
+        D_INST_JIRL(code, reg2, REG_T2, 0);
+    }
+
+    // Now output the call instruction and update the 'dst' pointer
+    //
+    unsigned outputInstrSize = emitOutput_Instr(dst, code);
+    dst += outputInstrSize;
+
+    // update volatile regs within emitThisGCrefRegs and emitThisByrefRegs.
+    if (gcrefRegs != emitThisGCrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+    }
+    if (byrefRegs != emitThisByrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+    }
+
+    // All call instructions are 4-byte in size on LOONGARCH64
+    // not including delay-slot which processed later.
+    assert(outputInstrSize == callInstrSize);
+
+    // If the method returns a GC ref, mark INTRET (A0) appropriately.
+    if (id->idGCref() == GCT_GCREF)
+    {
+        gcrefRegs = emitThisGCrefRegs | RBM_INTRET;
+    }
+    else if (id->idGCref() == GCT_BYREF)
+    {
+        byrefRegs = emitThisByrefRegs | RBM_INTRET;
+    }
+
+    // If is a multi-register return method is called, mark INTRET_1 (A1) appropriately
+    if (id->idIsLargeCall())
+    {
+        instrDescCGCA* idCall = (instrDescCGCA*)id;
+        if (idCall->idSecondGCref() == GCT_GCREF)
+        {
+            gcrefRegs |= RBM_INTRET_1;
+        }
+        else if (idCall->idSecondGCref() == GCT_BYREF)
+        {
+            byrefRegs |= RBM_INTRET_1;
+        }
+    }
+
+    // If the GC register set has changed, report the new set.
+    if (gcrefRegs != emitThisGCrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+    }
+    // If the Byref register set has changed, report the new set.
+    if (byrefRegs != emitThisByrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+    }
+
+    // Some helper calls may be marked as not requiring GC info to be recorded.
+    if (!id->idIsNoGC())
+    {
+        // On LOONGARCH64, as on AMD64, we don't change the stack pointer to push/pop args.
+        // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism
+        // to record the call for GC info purposes.  (It might be best to use an alternate call,
+        // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.)
+        emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0);
+
+        // Do we need to record a call location for GC purposes?
+        //
+        if (!emitFullGCinfo)
+        {
+            emitRecordGCcall(dst, callInstrSize);
+        }
+    }
+    if (id->idIsCallRegPtr())
+    {
+        callInstrSize = 1 << 2;
+    }
+    else
+    {
+        callInstrSize = id->idIsReloc()? (2 << 2) : (4 << 2);// INS_OPTS_C: 2/4-ins.
+    }
+
+    return callInstrSize;
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit LOONGARCH64 instruction
+ */
+
+/*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code)
+{
+    assert(sizeof(code_t) == 4);
+    BYTE* dstRW = dst + writeableOffset;
+    *((code_t*)dstRW) = code;
+
+    return sizeof(code_t);
+}
+
+/*****************************************************************************
+*
+ *  Append the machine code corresponding to the given instruction descriptor
+ *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ *  is the instruction group that contains the instruction. Updates '*dp' to
+ *  point past the generated code, and returns the size of the instruction
+ *  descriptor in bytes.
+ */
+
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+    BYTE* dst = *dp;
+    BYTE* dst2 = dst;//addr for updating gc info if needed.
+    code_t code = 0;
+    instruction ins;
+    size_t sz;// = emitSizeOfInsDsc(id);
+
+#ifdef DEBUG
+#if DUMP_GC_TABLES
+    bool dspOffs = emitComp->opts.dspGCtbls;
+#else
+    bool dspOffs = !emitComp->opts.disDiffable;
+#endif
+#endif // DEBUG
+
+    assert(REG_NA == (int)REG_NA);
+
+    insOpts insOp = id->idInsOpt();
+
+    switch (insOp)
+    {
+        case INS_OPTS_RELOC:
+        {
+            //  case:EA_HANDLE_CNS_RELOC
+            //   pcaddu12i  reg, off-hi-20bits
+            //   addi_d  reg, reg, off-lo-12bits
+            //  case:EA_PTR_DSP_RELOC
+            //   pcaddu12i  reg, off-hi-20bits
+            //   ldptr_d  reg, reg, off-lo-12bits
+
+            regNumber reg1 = id->idReg1();
+
+            emitRecordRelocation(dst, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC);
+
+            *(code_t *)dst = 0x1c000000 | (code_t)reg1;
+            dst += 4;
+            dst2 = dst;
+
+#ifdef DEBUG
+            code = emitInsCode(INS_pcaddu12i);
+            assert(code == 0x1c000000);
+            code = emitInsCode(INS_addi_d);
+            assert(code == 0x02c00000);
+            code = emitInsCode(INS_ldptr_d);
+            assert(code == 0x26000000);
+#endif
+
+            if (id->idIsCnsReloc())
+            {
+                ins = INS_addi_d;
+                *(code_t *)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1<<5);
+            }
+            else //if (id->idIsDspReloc())
+            {
+                assert(id->idIsDspReloc());
+                ins = INS_ldptr_d;
+                *(code_t *)dst = 0x26000000 | (code_t)reg1 | (code_t)(reg1<<5);
+            }
+
+            if (id->idGCref() != GCT_NONE)
+            {
+                emitGCregLiveUpd(id->idGCref(), reg1, dst);
+            }
+            else
+            {
+                emitGCregDeadUpd(reg1, dst);
+            }
+
+            dst += 4;
+
+            sz  = sizeof(instrDesc);
+        }
+            break;
+        case INS_OPTS_I:
+        {
+            ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr);
+            regNumber reg1 = id->idReg1();
+            dst2 += 4;//assert(dst2 == dst);
+
+            switch (id->idCodeSize())
+            {
+            case 8://if (id->idCodeSize() == 8)
+            {
+                if (id->idReg2()) { // special for INT64_MAX or UINT32_MAX;
+                    code = emitInsCode(INS_addi_d);
+                    //emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1);
+                    D_INST_2RI12(code, reg1, REG_R0, -1);
+                    *(code_t *)dst = code;
+                    dst += 4;
+
+                    ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32;
+                    code = emitInsCode(INS_srli_d);
+                    //emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6);
+                    code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10));
+                    *(code_t *)dst = code;
+                }
+                else {
+                    code = emitInsCode(INS_lu12i_w);
+                    D_INST_lu12i_w(code, reg1, imm >> 12);
+                    *(code_t *)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(INS_ori);
+                    D_INST_ori(code, reg1, reg1, imm);
+                    *(code_t *)dst = code;
+                }
+                break;
+            }
+            case 12: //else if (id->idCodeSize() == 12)
+            {
+                code = emitInsCode(INS_lu12i_w);
+                D_INST_lu12i_w(code, reg1, imm >> 12);
+                *(code_t *)dst = code;
+                dst += 4;
+
+                code = emitInsCode(INS_ori);
+                D_INST_ori(code, reg1, reg1, imm);
+                *(code_t *)dst = code;
+                dst += 4;
+
+                code = emitInsCode(INS_lu32i_d);
+                //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
+                D_INST_lu32i_d(code, reg1, imm >> 32);
+                *(code_t *)dst = code;
+
+                break;
+            }
+            case 16://else if (id->idCodeSize() == 16)
+            {
+                code = emitInsCode(INS_lu12i_w);
+                D_INST_lu12i_w(code, reg1, imm >> 12);
+                *(code_t *)dst = code;
+                dst += 4;
+
+                code = emitInsCode(INS_ori);
+                D_INST_ori(code, reg1, reg1, imm);
+                *(code_t *)dst = code;
+                dst += 4;
+
+                code = emitInsCode(INS_lu32i_d);
+                D_INST_lu32i_d(code, reg1, imm >> 32);
+                *(code_t *)dst = code;
+                dst += 4;
+
+                code = emitInsCode(INS_lu52i_d);
+                D_INST_lu52i_d(code, reg1, reg1, imm >> 52);
+                *(code_t *)dst = code;
+
+                break;
+            }
+            default :
+                unreached();
+                break;
+            }
+
+            ins = INS_ori;
+            dst += 4;
+
+            sz  = sizeof(instrDesc);
+        }
+            break;
+        case INS_OPTS_RC:
+        {
+            // Reference to JIT data
+
+            //when id->idIns == bl, for reloc!
+            //   pcaddu12i r21, off-hi-20bits
+            //   addi_d  reg, r21, off-lo-12bits
+            //when id->idIns == load-ins
+            //   pcaddu12i r21, off-hi-20bits
+            //   load  reg, offs_lo-12bits(r21)    #when ins is load ins.
+            //
+            //when id->idIns == bl
+            //   lu12i_w r21, addr-hi-20bits
+            //   ori     reg, r21, addr-lo-12bits
+            //   lu32i_d reg, addr_hi-32bits
+            //
+            //when id->idIns == load-ins
+            //   lu12i_w r21, offs_hi-20bits
+            //   lu32i_d r21, 0xff  addr_hi-32bits
+            //   load  reg, addr_lo-12bits(r21)
+            assert(id->idAddr()->iiaIsJitDataOffset());
+            assert(id->idGCref() == GCT_NONE);
+
+            int doff = id->idAddr()->iiaGetJitDataOffset();
+            assert(doff >= 0);
+
+            ssize_t imm = emitGetInsSC(id);
+            assert((imm >= 0) && (imm < 0x4000)); // 0x4000 is arbitrary, currently 'imm' is always 0.
+
+            unsigned dataOffs = (unsigned)(doff + imm);
+
+            assert(dataOffs < emitDataSize());
+
+            ins = id->idIns();
+            regNumber reg1 = id->idReg1();
+
+            if (id->idIsReloc())
+            {
+                //get the addr-offset of the data.
+                imm = (ssize_t)emitConsBlock - (ssize_t)dst + dataOffs;
+                assert(imm > 0);
+                assert(!(imm & 3));
+
+                doff = (int)(imm & 0x800);
+                imm += doff;
+                assert(isValidSimm20(imm >> 12));
+
+                doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit.
+
+#ifdef DEBUG
+                code = emitInsCode(INS_pcaddu12i);
+                assert(code == 0x1c000000);
+#endif
+                code = 0x1c000000 | 21;
+                *(code_t *)dst = code | (((code_t)imm & 0xfffff000) >> 7);
+                dst += 4;
+
+                if (ins == INS_bl)
+                {
+                    assert(isGeneralRegister(reg1));
+                    ins = INS_addi_d;
+#ifdef DEBUG
+                    code = emitInsCode(INS_addi_d);
+                    assert(code == 0x02c00000);
+#endif
+                    code = 0x02c00000 | (21<<5);
+                    *(code_t *)dst = code | (code_t)reg1 | (((code_t)doff & 0xfff) << 10);
+                }
+                else
+                {
+                    code = emitInsCode(ins);
+                    D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff);//NOTE:here must be REG_R21 !!!
+                    *(code_t *)dst = code;
+                }
+                dst += 4;
+                dst2 = dst;
+            }
+            else
+            {
+                //get the addr of the data.
+                imm = (ssize_t)emitConsBlock + dataOffs;
+
+                code = emitInsCode(INS_lu12i_w);
+                if (ins == INS_bl)
+                {
+                    assert((imm >> 32) == 0xff);
+                    //assert((imm >> 32) <= 0x7ffff);
+
+                    doff = (int)imm >> 12;
+                    D_INST_lu12i_w(code, REG_R21, doff);
+                    *(code_t *)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(INS_ori);
+                    D_INST_ori(code, reg1, REG_R21, imm);
+                    *(code_t *)dst = code;
+                    dst += 4;
+                    dst2 = dst;
+
+                    ins = INS_lu32i_d;
+                    code = emitInsCode(INS_lu32i_d);
+                    //D_INST_lu32i_d(code, reg1, imm >> 32);
+                    D_INST_lu32i_d(code, reg1, 0xff);
+                    *(code_t *)dst = code;
+                    dst += 4;
+                }
+                else
+                {
+                    doff = (int)(imm & 0x800);
+                    imm += doff;
+                    doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit.
+
+                    assert((imm >> 32) == 0xff);
+                    //assert((imm >> 32) <= 0x7ffff);
+
+                    dataOffs = (unsigned)(imm >> 12); //addr-hi-20bits.
+                    D_INST_lu12i_w(code, REG_R21, dataOffs);
+                    *(code_t *)dst = code;
+                    dst += 4;
+
+                    //emitIns_R_I(INS_lu32i_d, size, REG_R21, imm >> 32);
+                    code = emitInsCode(INS_lu32i_d);
+                    //D_INST_lu32i_d(code, REG_R21, imm >> 32);
+                    D_INST_lu32i_d(code, REG_R21, 0xff);
+                    *(code_t *)dst = code;
+                    dst += 4;
+
+                    code = emitInsCode(ins);
+                    D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff);
+                    *(code_t *)dst = code;
+                    dst += 4;
+                    dst2 = dst;
+                }
+            }
+
+            sz  = sizeof(instrDesc);
+        }
+            break;
+
+        case INS_OPTS_RL:
+        {
+            //if for reloc!
+            //   pcaddu12i reg, offset-hi20
+            //   addi_d  reg, reg, offset-lo12
+            //
+            //else:       ////TODO:optimize.
+            //   lu12i_w reg, dst-hi-12bits
+            //   ori reg, reg, dst-lo-12bits
+            //   lu32i_d reg, dst-hi-32bits
+
+            insGroup* tgtIG = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel);
+            id->idAddr()->iiaIGlabel = tgtIG;
+
+            regNumber reg1 = id->idReg1();
+            assert(isGeneralRegister(reg1));
+
+            if (id->idIsReloc())
+            {
+                ssize_t imm = (ssize_t)tgtIG->igOffs;
+                imm = (ssize_t)emitCodeBlock + imm - (ssize_t)dst;
+                assert((imm & 3) == 0);
+
+                int doff = (int)(imm & 0x800);
+                imm += doff;
+                assert(isValidSimm20(imm >> 12));
+
+                doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit.
+
+                code = 0x1c000000;
+                *(code_t *)dst = code | (code_t)reg1 | ((imm & 0xfffff000)>>7);
+                dst += 4;
+                dst2 = dst;
+#ifdef DEBUG
+                code = emitInsCode(INS_pcaddu12i);
+                assert(code == 0x1c000000);
+                code = emitInsCode(INS_addi_d);
+                assert(code == 0x02c00000);
+#endif
+                *(code_t *)dst = 0x02c00000 | (code_t)reg1 | ((code_t)reg1<<5) | ((doff & 0xfff)<<10);
+                ins = INS_addi_d;
+            } else
+            {
+                ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock;
+                //assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff
+                assert((imm >> 32) == 0xff);
+
+                code = emitInsCode(INS_lu12i_w);
+                D_INST_lu12i_w(code, REG_R21, imm >> 12);
+                *(code_t *)dst = code;
+                dst += 4;
+
+                code = emitInsCode(INS_ori);
+                D_INST_ori(code, reg1, REG_R21, imm);
+                *(code_t *)dst = code;
+                dst += 4;
+                dst2 = dst;
+
+                ins = INS_lu32i_d;
+                //emitIns_R_I(INS_lu32i_d, size, reg1, 0xff);
+                code = emitInsCode(INS_lu32i_d);
+                //D_INST_lu32i_d(code, reg1, imm >> 32);
+                D_INST_lu32i_d(code, reg1, 0xff);
+                *(code_t *)dst = code;
+            }
+
+            dst += 4;
+
+            sz  = sizeof(instrDesc);
+        }
+            break;
+        case INS_OPTS_JIRL:
+        //  case_1:           <----------from INS_OPTS_J:
+        //   xor r21,reg1,reg2   |   bne/beq  _next   |    bcnez/bceqz  _next
+        //   bnez/beqz  dst      |   b  dst           |    b  dst
+        //_next:
+        //
+        //  case_2:           <---------- TODO: from INS_OPTS_J:
+        //   bnez/beqz  _next:
+        //   pcaddi r21,off-hi
+        //   jirl  r0,r21,off-lo
+        //_next:
+        //
+        //  case_3:           <----------INS_OPTS_JIRL:   //not used by now !!!
+        //   b dst
+        //
+        //  case_4:           <----------INS_OPTS_JIRL:   //not used by now !!!
+        //   pcaddi r21,off-hi
+        //   jirl  r0,r21,off-lo
+        //
+        {
+            instrDescJmp* jmp = (instrDescJmp*) id;
+
+            regNumber reg1 = id->idReg1();
+            {
+                ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset();
+                imm -= 4;
+
+                ins = jmp->idIns();
+                assert(jmp->idCodeSize() > 4); //The original INS_OPTS_JIRL: not used by now!!!
+                switch (jmp->idCodeSize())
+                {
+                    case 8:
+                    {
+                        regNumber reg2 = id->idReg2();
+                        assert((INS_bceqz <= ins) && (ins <= INS_bgeu));
+                        //assert((INS_bceqz <= ins) && (ins <= INS_bl));//TODO
+                        if ((INS_beq == ins) || (INS_bne == ins))
+                        {
+                            if ((-0x400000 <= imm) && (imm < 0x400000))
+                            {
+                                code = emitInsCode(INS_xor);
+                                D_INST_3R(code, REG_R21, reg1, reg2);
+                                *(code_t *)dst = code;
+                                dst += 4;
+
+                                code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez);
+                                D_INST_Bcond_Z(code, REG_R21, imm);
+                                *(code_t *)dst = code;
+                                dst += 4;
+                            }
+                            else //if ((-0x8000000 <= imm) && (imm < 0x8000000))
+                            {
+                                assert((-0x8000000 <= imm) && (imm < 0x8000000));
+                                assert((INS_bne & 0xfffe) == INS_beq);
+
+                                code = emitInsCode((instruction)((int)ins ^ 0x1));
+                                code |= ((code_t)(reg1) /*& 0x1f */)<<5; /* rj */
+                                code |= ((code_t)(reg2) /*& 0x1f */); /* rd */
+                                code |= 0x800;
+                                *(code_t *)dst = code;
+                                dst += 4;
+
+                                code = emitInsCode(INS_b);
+                                D_INST_B(code, imm);
+                                *(code_t *)dst = code;
+                                dst += 4;
+                            }
+                            //else
+                            //    unreached();
+                        }
+                        else if ((INS_bceqz == ins) || (INS_bcnez == ins))
+                        {
+                            assert((-0x8000000 <= imm) && (imm < 0x8000000));
+                            assert((INS_bcnez & 0xfffe) == INS_bceqz);
+
+                            code = emitInsCode((instruction)((int)ins ^ 0x1));
+                            code |= ((code_t)reg1)<<5; /* rj */
+                            code |= 0x800;
+                            *(code_t *)dst = code;
+                            dst += 4;
+
+                            code = emitInsCode(INS_b);
+                            D_INST_B(code, imm);
+                            *(code_t *)dst = code;
+                            dst += 4;
+                        }
+                        else if ((INS_blt <= ins) && (ins <= INS_bgeu))
+                        {
+                            assert((-0x8000000 <= imm) && (imm < 0x8000000));
+                            assert((INS_bge & 0xfffe) == INS_blt);
+                            assert((INS_bgeu & 0xfffe) == INS_bltu);
+
+                            code = emitInsCode((instruction)((int)ins ^ 0x1));
+                            code |= ((code_t)(reg1) /*& 0x1f */)<<5; /* rj */
+                            code |= ((code_t)(reg2) /*& 0x1f */); /* rd */
+                            code |= 0x800;
+                            *(code_t *)dst = code;
+                            dst += 4;
+
+                            code = emitInsCode(INS_b);
+                            D_INST_B(code, imm);
+                            *(code_t *)dst = code;
+                            dst += 4;
+                        }
+                        break;
+                    }
+                    //case 12:
+                    default :
+                        unreached();
+                        break;
+                }
+            }
+            sz  = sizeof(instrDescJmp);
+        }
+            break;
+        case INS_OPTS_J_cond:
+            //   b_cond  dst-relative.
+            //
+            //NOTE:
+            //  the case "imm > 0x7fff" not supported.
+            //  More info within the emitter::emitIns_J_cond_la();
+        {
+            ssize_t imm = (ssize_t) id->idAddr()->iiaGetJmpOffset();//get jmp's offset relative delay-slot.
+            assert((OFFSET_DIST_SMALL_MAX_NEG << 2) <= imm && imm <= (OFFSET_DIST_SMALL_MAX_POS << 2));
+            assert(!(imm & 3));
+
+            ins = id->idIns();
+            code = emitInsCode(ins);
+            D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm);
+            *(code_t *)dst = code;
+            dst += 4;
+
+            sz  = sizeof(instrDescJmp);
+        }
+            break;
+        case INS_OPTS_J:
+        //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl  dst-relative.
+        {
+            ssize_t imm = (ssize_t) id->idAddr()->iiaGetJmpOffset();//get jmp's offset relative delay-slot.
+            assert(!(imm & 3));
+
+            ins = id->idIns();
+            code = emitInsCode(ins);
+            if (ins == INS_b || ins == INS_bl)
+            {
+                D_INST_B(code, imm);
+            }
+            else if (ins == INS_bnez || ins == INS_beqz)
+            {
+                D_INST_Bcond_Z(code, id->idReg1(), imm);
+            }
+            else if (ins == INS_bcnez || ins == INS_bceqz)
+            {
+                assert((code_t)(id->idReg1()) < 8);//cc
+                D_INST_Bcond_Z(code, id->idReg1(), imm);
+            }
+            else if ((INS_beq <= ins) && (ins <= INS_bgeu))
+            {
+                D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm);
+            }
+            else
+            {
+                assert(!"unimplemented on LOONGARCH yet");
+            }
+            *(code_t *)dst = code;
+            dst += 4;
+
+            sz  = sizeof(instrDescJmp);
+        }
+            break;
+
+        case INS_OPTS_C:
+            if (id->idIsLargeCall())
+            {
+                /* Must be a "fat" call descriptor */
+                sz = sizeof(instrDescCGCA);
+            }
+            else
+            {
+                assert(!id->idIsLargeDsp());
+                assert(!id->idIsLargeCns());
+                sz = sizeof(instrDesc);
+            }
+            dst += emitOutputCall(ig, dst, id, 0);
+            ins = INS_nop;
+            break;
+
+        //case INS_OPTS_NONE:
+        default:
+            //assert(id->idGCref() == GCT_NONE);
+            *(code_t *)dst = id->idAddr()->iiaGetInstrEncode();
+            dst += 4;
+            dst2 = dst;
+            ins = id->idIns();
+            sz = emitSizeOfInsDsc(id);
+            break;
+    }
+
+    // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref.
+    // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a
+    // GC ref to register "id->idReg1()".  (It may, apparently, also not be GC_NONE in other cases, such as
+    // for stores, but we ignore those cases here.)
+    if (emitInsMayWriteToGCReg(ins)) // True if "id->idIns()" writes to a register than can hold GC ref.
+    {
+        // We assume that "idReg1" is the primary destination register for all instructions
+        if (id->idGCref() != GCT_NONE)
+        {
+            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst2);
+        }
+        else
+        {
+            emitGCregDeadUpd(id->idReg1(), dst2);
+        }
+
+        //if (emitInsMayWriteMultipleRegs(id))
+        //{
+        //    // INS_gslq etc...
+        //    // "idReg2" is the secondary destination register
+        //    if (id->idGCrefReg2() != GCT_NONE)
+        //    {
+        //        emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), *dp);
+        //    }
+        //    else
+        //    {
+        //        emitGCregDeadUpd(id->idReg2(), *dp);
+        //    }
+        //}
+    }
+
+    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+    // ref or overwritten one.
+    if (emitInsWritesToLclVarStackLoc(id) /*|| emitInsWritesToLclVarStackLocPair(id)*/)
+    {
+        int      varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+        unsigned ofs    = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
+        bool     FPbased;
+        int      adr = emitComp->lvaFrameAddress(varNum, &FPbased);
+        if (id->idGCref() != GCT_NONE)
+        {
+            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst2 DEBUG_ARG(varNum));
+        }
+        else
+        {
+            // If the type of the local is a gc ref type, update the liveness.
+            var_types vt;
+            if (varNum >= 0)
+            {
+                // "Regular" (non-spill-temp) local.
+                vt = var_types(emitComp->lvaTable[varNum].lvType);
+            }
+            else
+            {
+                TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
+                vt              = tmpDsc->tdTempType();
+            }
+            if (vt == TYP_REF || vt == TYP_BYREF)
+                emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum));
+        }
+        //if (emitInsWritesToLclVarStackLocPair(id))
+        //{
+        //    unsigned ofs2 = ofs + TARGET_POINTER_SIZE;
+        //    if (id->idGCrefReg2() != GCT_NONE)
+        //    {
+        //        emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp);
+        //    }
+        //    else
+        //    {
+        //        // If the type of the local is a gc ref type, update the liveness.
+        //        var_types vt;
+        //        if (varNum >= 0)
+        //        {
+        //            // "Regular" (non-spill-temp) local.
+        //            vt = var_types(emitComp->lvaTable[varNum].lvType);
+        //        }
+        //        else
+        //        {
+        //            TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
+        //            vt              = tmpDsc->tdTempType();
+        //        }
+        //        if (vt == TYP_REF || vt == TYP_BYREF)
+        //            emitGCvarDeadUpd(adr + ofs2, *dp);
+        //    }
+        //}
+    }
+
+#ifdef DEBUG
+    /* Make sure we set the instruction descriptor size correctly */
+
+    //size_t expected = emitSizeOfInsDsc(id);
+    //assert(sz == expected);
+
+    if (emitComp->opts.disAsm || emitComp->verbose)
+    {
+        code_t *cp = (code_t*) *dp;
+        while ((BYTE*)cp != dst)
+        {
+            emitDisInsName(*cp, (BYTE*)cp, id);
+            cp++;
+        }
+        //emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+    }
+
+    if (emitComp->compDebugBreak)
+    {
+        // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
+        // emitting instruction a6, (i.e. IN00a6 in jitdump).
+        if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
+        {
+            assert(!"JitBreakEmitOutputInstr reached");
+        }
+    }
+#endif
+
+    /* All instructions are expected to generate code */
+
+    assert(*dp != dst);
+
+    *dp = dst;
+
+    return sz;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+/****************************************************************************
+ *
+ *  Display the given instruction.
+ */
+
+//NOTE: At least 32bytes within dst.
+void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
+{
+    const BYTE* insstrs = dst;
+
+    if (!code)
+    {
+        printf("LOONGARCH invalid instruction: 0x%x\n", code);
+        assert(!"invalid inscode on LOONGARCH!");
+        return ;
+    }
+
+// clang-format off
+    const char * const regName[] = {"zero", "ra", "tp", "sp", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "x0", "fp", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"};
+
+    const char * const FregName[] = {"fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11", "ft12", "ft13", "ft14", "ft15", "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"};
+
+    const char * const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"};
+// clang-format on
+
+
+    unsigned int opcode = (code>>26) & 0x3f;
+
+    //bits: 31-26,MSB6
+    switch (opcode)
+    {
+        case 0x0:
+        {
+           goto Label_OPCODE_0;
+           //break;
+        }
+        //case 0x1:
+        //{
+        //    assert(!"unimplemented on loongarch yet!");
+        //    //goto Label_OPCODE_1;
+        //    break;
+        //}
+        case 0x2:
+        {
+            goto Label_OPCODE_2;
+            //break;
+        }
+        case 0x3:
+        {
+            goto Label_OPCODE_3;
+            //break;
+        }
+        case 0xe:
+        {
+            goto Label_OPCODE_E;
+            //break;
+        }
+        case LA_2RI16_ADDU16I_D: //0x4
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            short si16 = (code >> 10) & 0xffff;
+            printf("   0x%llx   addu16i.d  %s, %s, %d\n", insstrs, rd, rj, si16);
+            return;
+        }
+        case 0x5:
+        case 0x6:
+        case 0x7:
+        {
+            //bits: 31-25,MSB7
+            unsigned int inscode = (code >> 25) & 0x7f;
+            const char *rd = regName[code & 0x1f];
+            unsigned int si20 = (code >> 5) & 0xfffff;
+            switch (inscode)
+            {
+                case LA_1RI20_LU12I_W:
+                    printf("   0x%llx   lu12i.w  %s, 0x%x\n", insstrs, rd, si20);
+                    return;
+                case LA_1RI20_LU32I_D:
+                    printf("   0x%llx   lu32i.d  %s, 0x%x\n", insstrs, rd, si20);
+                    return;
+                case LA_1RI20_PCADDI:
+                    printf("   0x%llx   pcaddi  %s, 0x%x\n", insstrs, rd, si20);
+                    return;
+                case LA_1RI20_PCALAU12I:
+                    printf("   0x%llx   pcalau12i  %s, 0x%x\n", insstrs, rd, si20);
+                    return;
+                case LA_1RI20_PCADDU12I:
+                    printf("   0x%llx   pcaddu12i  %s, 0x%x\n", insstrs, rd, si20);
+                    return;
+                case LA_1RI20_PCADDU18I:
+                {
+                    printf("   0x%llx   pcaddu18i  %s, 0x%x\n", insstrs, rd, si20);
+                    return;
+                }
+                default :
+                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    return;
+            }
+            return;
+        }
+        case 0x8:
+        case 0x9:
+        {
+            //bits: 31-24,MSB8
+            unsigned int inscode = (code >> 24) & 0xff;
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            short si14 = ((code >> 10) & 0x3fff)<<2;
+            si14 >>= 2;
+            switch (inscode)
+            {
+                case LA_2RI14_LL_W:
+                    printf("   0x%llx   ll.w  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    return;
+                case LA_2RI14_SC_W:
+                    printf("   0x%llx   sc.w  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    return;
+                case LA_2RI14_LL_D:
+                    printf("   0x%llx   ll.d  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    return;
+                case LA_2RI14_SC_D:
+                    printf("   0x%llx   sc.d  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    return;
+                case LA_2RI14_LDPTR_W:
+                    printf("   0x%llx   ldptr.w  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    return;
+                case LA_2RI14_STPTR_W:
+                    printf("   0x%llx   stptr.w  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    return;
+                case LA_2RI14_LDPTR_D:
+                    printf("   0x%llx   ldptr.d  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    return;
+                case LA_2RI14_STPTR_D:
+                    printf("   0x%llx   stptr.d  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    return;
+                default :
+                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    return;
+            }
+            return;
+        }
+        case 0xa:
+        {
+            //bits: 31-24,MSB8
+            unsigned int inscode = (code >> 22) & 0x3ff;
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *fd = FregName[code & 0x1f];
+            short si12 = ((code >> 10) & 0xfff)<<4;
+            si12 >>= 4;
+            switch (inscode)
+            {
+                case LA_2RI12_LD_B:
+                    printf("   0x%llx   ld.b  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_H:
+                    printf("   0x%llx   ld.h  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_W:
+                    printf("   0x%llx   ld.w  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_D:
+                    printf("   0x%llx   ld.d  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_ST_B:
+                    printf("   0x%llx   st.b  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_ST_H:
+                    printf("   0x%llx   st.h  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_ST_W:
+                    printf("   0x%llx   st.w  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_ST_D:
+                    printf("   0x%llx   st.d  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_BU:
+                    printf("   0x%llx   ld.bu  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_HU:
+                    printf("   0x%llx   ld.hu  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_LD_WU:
+                    printf("   0x%llx   ld.wu  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    return;
+                case LA_2RI12_PRELD:
+                    assert(!"unimplemented on loongarch yet!");
+                    return;
+                case LA_2RI12_FLD_S:
+                    printf("   0x%llx   fld.s  %s, %s, %d\n", insstrs, fd, rj, si12);
+                    return;
+                case LA_2RI12_FST_S:
+                    printf("   0x%llx   fst.s  %s, %s, %d\n", insstrs, fd, rj, si12);
+                    return;
+                case LA_2RI12_FLD_D:
+                    printf("   0x%llx   fld.d  %s, %s, %d\n", insstrs, fd, rj, si12);
+                    return;
+                case LA_2RI12_FST_D:
+                    printf("   0x%llx   fst.d  %s, %s, %d\n", insstrs, fd, rj, si12);
+                    return;
+                default :
+                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    return;
+            }
+            return;
+        }
+        case LA_1RI21_BEQZ: //0x10
+        {
+            const char *rj = regName[(code>>5) & 0x1f];
+            int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16))<<11;
+            offs21 >>= 9;
+            printf("   0x%llx   beqz  %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21);
+            return;
+        }
+        case LA_1RI21_BNEZ: //0x11
+        {
+            const char *rj = regName[(code>>5) & 0x1f];
+            int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16))<<11;
+            offs21 >>= 9;
+            printf("   0x%llx   bnez  %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21);
+            return;
+        }
+        case 0x12:
+        {
+            //LA_1RI21_BCEQZ
+            //LA_1RI21_BCNEZ
+            const char *cj = CFregName[(code>>5) & 0x7];
+            int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
+            offs21 >>= 9;
+            if (0 == ((code>>8) & 0x3)) {
+                printf("   0x%llx   bceqz  %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21);
+                return;
+            }
+            else if (1 == ((code>>8) & 0x3)) {
+                printf("   0x%llx   bcnez  %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21);
+                return;
+            }
+            else {
+                printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                return;
+            }
+            return;
+        }
+        case LA_2RI16_JIRL: //0x13
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            int offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            if(id->idDebugOnlyInfo()->idMemCookie)
+            {
+                assert(0 < id->idDebugOnlyInfo()->idMemCookie);
+                const char* methodName;
+                methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+                printf("   0x%llx   jirl  %s, %s, %d  #%s\n", insstrs, rd, rj, offs16, methodName);
+            }
+            else
+            {
+                printf("   0x%llx   jirl  %s, %s, %d\n", insstrs, rd, rj, offs16);
+            }
+            return;
+        }
+        case LA_I26_B: //0x14
+        {
+            int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16))<<6;
+            offs26 >>= 4;
+            printf("   0x%llx   b  0x%llx\n", insstrs, (int64_t)insstrs + offs26);
+            return;
+        }
+        case LA_I26_BL: //0x15
+        {
+            int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16))<<6;
+            offs26 >>= 4;
+            printf("   0x%llx   bl  0x%llx\n", insstrs, (int64_t)insstrs + offs26);
+            return;
+        }
+        case LA_2RI16_BEQ: //0x16
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            int offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("   0x%llx   beq  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            return;
+        }
+        case LA_2RI16_BNE: //0x17
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            int offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("   0x%llx   bne  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            return;
+        }
+        case LA_2RI16_BLT: //0x18
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            int offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("   0x%llx   blt  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            return;
+        }
+        case LA_2RI16_BGE: //0x19
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            int offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("   0x%llx   bge  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            return;
+        }
+        case LA_2RI16_BLTU: //0x1a
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            int offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("   0x%llx   bltu  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            return;
+        }
+        case LA_2RI16_BGEU: //0x1b
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            int offs16 = (short)((code >> 10) & 0xffff);
+            offs16 <<= 2;
+            printf("   0x%llx   bgeu  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            return;
+        }
+
+        default :
+            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            return;
+    }
+
+Label_OPCODE_0:
+    opcode = (code >> 22) & 0x3ff;
+
+    //bits: 31-22,MSB10
+    switch (opcode)
+    {
+        case 0x0:
+        {
+            //bits: 31-18,MSB14
+            unsigned int inscode1 = (code >> 18) & 0x3fff;
+            switch (inscode1)
+            {
+                case 0x0:
+                {
+                    //bits: 31-15,MSB17
+                    unsigned int inscode2 = (code >> 15) & 0x1ffff;
+                    switch (inscode2)
+                    {
+                        case 0x0:
+                        {
+                            //bits:31-10,MSB22
+                            unsigned int inscode3 = (code >> 10) & 0x3fffff;
+                            const char *rd = regName[code & 0x1f];
+                            const char *rj = regName[(code>>5) & 0x1f];
+                            switch (inscode3)
+                            {
+                                case LA_2R_CLO_W:
+                                    printf("   0x%llx   clo.w  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_CLZ_W:
+                                    printf("   0x%llx   clz.w  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_CTO_W:
+                                    printf("   0x%llx   cto.w  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_CTZ_W:
+                                    printf("   0x%llx   ctz.w  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_CLO_D:
+                                    printf("   0x%llx   clo.d  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_CLZ_D:
+                                    printf("   0x%llx   clz.d  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_CTO_D:
+                                    printf("   0x%llx   cto.d  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_CTZ_D:
+                                    printf("   0x%llx   ctz.d  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_REVB_2H:
+                                    printf("   0x%llx   revb.2h  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_REVB_4H:
+                                    printf("   0x%llx   revb.4h  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_REVB_2W:
+                                    printf("   0x%llx   revb.2w  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_REVB_D:
+                                    printf("   0x%llx   revb.d  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_REVH_2W:
+                                    printf("   0x%llx   revh.2w  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_REVH_D:
+                                    printf("   0x%llx   revh.d  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_BITREV_4B:
+                                    printf("   0x%llx   bitrev.4b  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_BITREV_8B:
+                                    printf("   0x%llx   bitrev.8b  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_BITREV_W:
+                                    printf("   0x%llx   bitrev.w  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_BITREV_D:
+                                    printf("   0x%llx   bitrev.d  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_EXT_W_H:
+                                    printf("   0x%llx   ext.w.h  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_EXT_W_B:
+                                    printf("   0x%llx   ext.w.b  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_RDTIMEL_W:
+                                    printf("   0x%llx   rdtimel.w  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_RDTIMEH_W:
+                                    printf("   0x%llx   rdtimeh.w  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_RDTIME_D:
+                                    printf("   0x%llx   rdtime.d  %s, %s\n", insstrs, rd, rj);
+                                    return;
+                                case LA_2R_CPUCFG:
+                                    printf("   0x%llx   cpucfg  %s, %s\n", insstrs, rd, rj);
+                                    return;
+
+                                default :
+                                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                                    return;
+                            }
+                            return;
+                        }
+                        case LA_2R_ASRTLE_D:
+                        {
+                            const char *rj = regName[(code>>5) & 0x1f];
+                            const char *rk = regName[(code>>10) & 0x1f];
+                            printf("   0x%llx   asrtle.d  %s, %s\n", insstrs, rj, rk);
+                            return;
+                        }
+                        case LA_2R_ASRTGT_D:
+                        {
+                            const char *rj = regName[(code>>5) & 0x1f];
+                            const char *rk = regName[(code>>10) & 0x1f];
+                            printf("   0x%llx   asrtgt.d  %s, %s\n", insstrs, rj, rk);
+                            return;
+                        }
+                        default :
+                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            return;
+                    }
+                    return;
+                }
+                case 0x1:
+                {
+                    //LA_OP_ALSL_W
+                    //LA_OP_ALSL_WU
+                    const char *rd = regName[code & 0x1f];
+                    const char *rj = regName[(code>>5) & 0x1f];
+                    const char *rk = regName[(code>>10) & 0x1f];
+                    unsigned int sa2 = (code>>15) & 0x3;
+                    if (0 == ((code>>17) & 0x1)) {
+                        printf("   0x%llx   alsl.w  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1));
+                        return;
+                    } else if (1 == ((code>>17) & 0x1)) {
+                        printf("   0x%llx   alsl.wu  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1));
+                        return;
+                    } else {
+                        printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                        return;
+                    }
+                    return;
+                }
+                case LA_OP_BYTEPICK_W: //0x2
+                {
+                    const char *rd = regName[code & 0x1f];
+                    const char *rj = regName[(code>>5) & 0x1f];
+                    const char *rk = regName[(code>>10) & 0x1f];
+                    unsigned int sa2 = (code>>15) & 0x3;
+                    printf("   0x%llx   bytepick.w  %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa2);
+                    return;
+                }
+                case LA_OP_BYTEPICK_D: //0x3
+                {
+                    const char *rd = regName[code & 0x1f];
+                    const char *rj = regName[(code>>5) & 0x1f];
+                    const char *rk = regName[(code>>10) & 0x1f];
+                    unsigned int sa3 = (code>>15) & 0x7;
+                    printf("   0x%llx   bytepick.d  %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa3);
+                    return;
+                }
+                case 0x4:
+                case 0x5:
+                case 0x6:
+                case 0x7:
+                case 0x8:
+                case 0x9:
+                {
+                    //bits: 31-15,MSB17
+                    unsigned int inscode2 = (code >> 15) & 0x1ffff;
+                    const char *rd = regName[code & 0x1f];
+                    const char *rj = regName[(code>>5) & 0x1f];
+                    const char *rk = regName[(code>>10) & 0x1f];
+
+                    switch (inscode2)
+                    {
+                        case LA_3R_ADD_W:
+                            printf("   0x%llx   add.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_ADD_D:
+                            printf("   0x%llx   add.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SUB_W:
+                            printf("   0x%llx   sub.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SUB_D:
+                            printf("   0x%llx   sub.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SLT:
+                            printf("   0x%llx   slt  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SLTU:
+                            printf("   0x%llx   sltu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MASKEQZ:
+                            printf("   0x%llx   maskeqz  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MASKNEZ:
+                            printf("   0x%llx   masknez  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_NOR:
+                            printf("   0x%llx   nor  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_AND:
+                            printf("   0x%llx   and  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_OR:
+                            printf("   0x%llx   or  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_XOR:
+                            printf("   0x%llx   xor  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_ORN:
+                            printf("   0x%llx   orn  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_ANDN:
+                            printf("   0x%llx   andn  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SLL_W:
+                            printf("   0x%llx   sll.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SRL_W:
+                            printf("   0x%llx   srl.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SRA_W:
+                            printf("   0x%llx   sra.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SLL_D:
+                            printf("   0x%llx   sll.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SRL_D:
+                            printf("   0x%llx   srl.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_SRA_D:
+                            printf("   0x%llx   sra.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_ROTR_W:
+                            printf("   0x%llx   rotr.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_ROTR_D:
+                            printf("   0x%llx   rotr.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MUL_W:
+                            printf("   0x%llx   mul.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MULH_W:
+                            printf("   0x%llx   mulh.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MULH_WU:
+                            printf("   0x%llx   mulh.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MUL_D:
+                            printf("   0x%llx   mul.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MULH_D:
+                            printf("   0x%llx   mulh.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MULH_DU:
+                            printf("   0x%llx   mulh.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MULW_D_W:
+                            printf("   0x%llx   mulw.d.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MULW_D_WU:
+                            printf("   0x%llx   mulw.d.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_DIV_W:
+                            printf("   0x%llx   div.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MOD_W:
+                            printf("   0x%llx   mod.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_DIV_WU:
+                            printf("   0x%llx   div.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MOD_WU:
+                            printf("   0x%llx   mod.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_DIV_D:
+                            printf("   0x%llx   div.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MOD_D:
+                            printf("   0x%llx   mod.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_DIV_DU:
+                            printf("   0x%llx   div.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_MOD_DU:
+                            printf("   0x%llx   mod.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_CRC_W_B_W:
+                            printf("   0x%llx   crc.w.b.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_CRC_W_H_W:
+                            printf("   0x%llx   crc.w.h.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_CRC_W_W_W:
+                            printf("   0x%llx   crc.w.w.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_CRC_W_D_W:
+                            printf("   0x%llx   crc.w.d.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_CRCC_W_B_W:
+                            printf("   0x%llx   crcc.w.b.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_CRCC_W_H_W:
+                            printf("   0x%llx   crcc.w.h.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_CRCC_W_W_W:
+                            printf("   0x%llx   crcc.w.w.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        case LA_3R_CRCC_W_D_W:
+                            printf("   0x%llx   crcc.w.d.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            return;
+                        default :
+                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            return;
+                    }
+                }
+                case 0xa:
+                {
+                    //bits: 31-15,MSB17
+                    unsigned int inscode2 = (code >> 15) & 0x1ffff;
+                    unsigned int codefield = code & 0x7fff;
+                    switch (inscode2)
+                    {
+                        case LA_OP_BREAK:
+                            printf("   0x%llx   break  0x%x\n", insstrs, codefield);
+                            return;
+                        case LA_OP_DBGCALL:
+                            printf("   0x%llx   dbgcall  0x%x\n", insstrs, codefield);
+                            return;
+                        case LA_OP_SYSCALL:
+                            printf("   0x%llx   syscall  0x%x\n", insstrs, codefield);
+                            return;
+                        default :
+                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            return;
+                    }
+                }
+                case LA_OP_ALSL_D: //0xb
+                {
+                    const char *rd = regName[code & 0x1f];
+                    const char *rj = regName[(code>>5) & 0x1f];
+                    const char *rk = regName[(code>>10) & 0x1f];
+                    unsigned int sa2 = (code>>15) & 0x3;
+                    printf("   0x%llx   alsl.d  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1));
+                    return;
+                }
+                default :
+                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    return;
+            }
+            return;
+        }
+        case 0x1:
+        {
+            if (code & 0x200000) {
+                //LA_OP_BSTRINS_W
+                //LA_OP_BSTRPICK_W
+                const char *rd = regName[code & 0x1f];
+                const char *rj = regName[(code>>5) & 0x1f];
+                unsigned int lsbw = (code >> 10) & 0x1f;
+                unsigned int msbw = (code >> 16) & 0x1f;
+                if (!(code & 0x8000)) {
+                    printf("   0x%llx   bstrins.w  %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw);
+                    return;
+                } else if (code & 0x8000) {
+                    printf("   0x%llx   bstrpick.w  %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw);
+                    return;
+                } else {
+                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    return;
+                }
+            }
+            else {
+                //bits: 31-18,MSB14
+                unsigned int inscode1 = (code >> 18) & 0x3fff;
+                switch (inscode1)
+                {
+                    case 0x10:
+                    {
+                        //LA_OP_SLLI_W:
+                        //LA_OP_SLLI_D:
+                        const char *rd = regName[code & 0x1f];
+                        const char *rj = regName[(code>>5) & 0x1f];
+                        if (1 == ((code>>15) & 0x7)) {
+                            unsigned int ui5 = (code>>10) & 0x1f;
+                            printf("   0x%llx   slli.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
+                            return;
+                        } else if (1 == ((code>>16) & 0x3)) {
+                            unsigned int ui6 = (code>>10) & 0x3f;
+                            printf("   0x%llx   slli.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
+                            return;
+                        } else {
+                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            return;
+                        }
+                        return;
+                    }
+                    case 0x11:
+                    {
+                        //LA_OP_SRLI_W:
+                        //LA_OP_SRLI_D:
+                        const char *rd = regName[code & 0x1f];
+                        const char *rj = regName[(code>>5) & 0x1f];
+                        if (1 == ((code>>15) & 0x7)) {
+                            unsigned int ui5 = (code>>10) & 0x1f;
+                            printf("   0x%llx   srli.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
+                            return;
+                        } else if (1 == ((code>>16) & 0x3)) {
+                            unsigned int ui6 = (code>>10) & 0x3f;
+                            printf("   0x%llx   srli.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
+                            return;
+                        } else {
+                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            return;
+                        }
+                        return;
+                    }
+                    case 0x12:
+                    {
+                        //LA_OP_SRAI_W:
+                        //LA_OP_SRAI_D:
+                        const char *rd = regName[code & 0x1f];
+                        const char *rj = regName[(code>>5) & 0x1f];
+                        if (1 == ((code>>15) & 0x7)) {
+                            unsigned int ui5 = (code>>10) & 0x1f;
+                            printf("   0x%llx   srai.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
+                            return;
+                        } else if (1 == ((code>>16) & 0x3)) {
+                            unsigned int ui6 = (code>>10) & 0x3f;
+                            printf("   0x%llx   srai.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
+                            return;
+                        } else {
+                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            return;
+                        }
+                        return;
+                    }
+                    case 0x13:
+                    {
+                        //LA_OP_ROTRI_W:
+                        //LA_OP_ROTRI_D:
+                        const char *rd = regName[code & 0x1f];
+                        const char *rj = regName[(code>>5) & 0x1f];
+                        if (1 == ((code>>15) & 0x7)) {
+                            unsigned int ui5 = (code>>10) & 0x1f;
+                            printf("   0x%llx   rotri.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
+                            return;
+                        } else if (1 == ((code>>16) & 0x3)) {
+                            unsigned int ui6 = (code>>10) & 0x3f;
+                            printf("   0x%llx   rotri.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
+                            return;
+                        } else {
+                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            return;
+                        }
+                        return;
+                    }
+                    default :
+                        printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                        return;
+                }
+                return;
+                }
+            return;
+        }
+        case LA_OP_BSTRINS_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            unsigned int lsbd = (code >> 10) & 0x3f;
+            unsigned int msbd = (code >> 16) & 0x3f;
+            printf("   0x%llx   bstrins.d  %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd);
+            return;
+        }
+        case LA_OP_BSTRPICK_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            unsigned int lsbd = (code >> 10) & 0x3f;
+            unsigned int msbd = (code >> 16) & 0x3f;
+            printf("   0x%llx   bstrpick.d  %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd);
+            return;
+        }
+        case 0x4:
+        {
+            //bits: 31-15,MSB17
+            unsigned int inscode1 = (code >> 15) & 0x1ffff;
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+
+            switch (inscode1)
+            {
+                case LA_3R_FADD_S:
+                    printf("   0x%llx   fadd.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FADD_D:
+                    printf("   0x%llx   fadd.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FSUB_S:
+                    printf("   0x%llx   fsub.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FSUB_D:
+                    printf("   0x%llx   fsub.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMUL_S:
+                    printf("   0x%llx   fmul.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMUL_D:
+                    printf("   0x%llx   fmul.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FDIV_S:
+                    printf("   0x%llx   fdiv.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FDIV_D:
+                    printf("   0x%llx   fdiv.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMAX_S:
+                    printf("   0x%llx   fmax.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMAX_D:
+                    printf("   0x%llx   fmax.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMIN_S:
+                    printf("   0x%llx   fmin.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMIN_D:
+                    printf("   0x%llx   fmin.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMAXA_S:
+                    printf("   0x%llx   fmaxa.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMAXA_D:
+                    printf("   0x%llx   fmaxa.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMINA_S:
+                    printf("   0x%llx   fmina.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FMINA_D:
+                    printf("   0x%llx   fmina.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FSCALEB_S:
+                    printf("   0x%llx   fscaleb.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FSCALEB_D:
+                    printf("   0x%llx   fscaleb.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FCOPYSIGN_S:
+                    printf("   0x%llx   fcopysign.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case LA_3R_FCOPYSIGN_D:
+                    printf("   0x%llx   fcopysign.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    return;
+                case 0x228:
+                case 0x229:
+                case 0x232:
+                case 0x234:
+                case 0x235:
+                case 0x236:
+                case 0x23a:
+                case 0x23c:
+                {
+                    //bits:31-10,MSB22
+                    unsigned int inscode2 = (code >> 10) & 0x3fffff;
+                    switch (inscode2)
+                    {
+                        case LA_2R_FABS_S:
+                            printf("   0x%llx   fabs.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FABS_D:
+                            printf("   0x%llx   fabs.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FNEG_S:
+                            printf("   0x%llx   fneg.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FNEG_D:
+                            printf("   0x%llx   fneg.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FLOGB_S:
+                            printf("   0x%llx   flogb.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FLOGB_D:
+                            printf("   0x%llx   flogb.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FCLASS_S:
+                            printf("   0x%llx   fclass.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FCLASS_D:
+                            printf("   0x%llx   fclass.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FSQRT_S:
+                            printf("   0x%llx   fsqrt.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FSQRT_D:
+                            printf("   0x%llx   fsqrt.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FRECIP_S:
+                            printf("   0x%llx   frecip.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FRECIP_D:
+                            printf("   0x%llx   frecip.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FRSQRT_S:
+                            printf("   0x%llx   frsqrt.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FRSQRT_D:
+                            printf("   0x%llx   frsqrt.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FMOV_S:
+                            printf("   0x%llx   fmov.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FMOV_D:
+                            printf("   0x%llx   fmov.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_MOVGR2FR_W:
+                            printf("   0x%llx   movgr2fr.w  %s, %s\n", insstrs, fd, rj);
+                            return;
+                        case LA_2R_MOVGR2FR_D:
+                            printf("   0x%llx   movgr2fr.d  %s, %s\n", insstrs, fd, rj);
+                            return;
+                        case LA_2R_MOVGR2FRH_W:
+                            printf("   0x%llx   movgr2frh.w  %s, %s\n", insstrs, fd, rj);
+                            return;
+                        case LA_2R_MOVFR2GR_S:
+                            printf("   0x%llx   movfr2gr.s  %s, %s\n", insstrs, rd, fj);
+                            return;
+                        case LA_2R_MOVFR2GR_D:
+                            printf("   0x%llx   movfr2gr.d  %s, %s\n", insstrs, rd, fj);
+                            return;
+                        case LA_2R_MOVFRH2GR_S:
+                            printf("   0x%llx   movfrh2gr.s  %s, %s\n", insstrs, rd, fj);
+                            return;
+                        case LA_2R_MOVGR2FCSR:
+                            assert(!"unimplemented on loongarch yet!");
+                            return;
+                        case LA_2R_MOVFCSR2GR:
+                            assert(!"unimplemented on loongarch yet!");
+                            return;
+                        case LA_2R_MOVFR2CF:
+                        {
+                            const char *cd = CFregName[code & 0x7];
+                            printf("   0x%llx   movfr2cf  %s, %s\n", insstrs, cd, fj);
+                            return;
+                        }
+                        case LA_2R_MOVCF2FR:
+                        {
+                            const char *cj = CFregName[(code>>5) & 0x7];
+                            printf("   0x%llx   movcf2fr  %s, %s\n", insstrs, fd, cj);
+                            return;
+                        }
+                        case LA_2R_MOVGR2CF:
+                        {
+                            const char *cd = CFregName[code & 0x7];
+                            printf("   0x%llx   movgr2cf  %s, %s\n", insstrs, cd, rj);
+                            return;
+                        }
+                        case LA_2R_MOVCF2GR:
+                        {
+                            const char *cj = CFregName[(code>>5) & 0x7];
+                            printf("   0x%llx   movcf2gr  %s, %s\n", insstrs, rd, cj);
+                            return;
+                        }
+                        case LA_2R_FCVT_S_D:
+                            printf("   0x%llx   fcvt.s.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FCVT_D_S:
+                            printf("   0x%llx   fcvt.d.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRM_W_S:
+                            printf("   0x%llx   ftintrm.w.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRM_W_D:
+                            printf("   0x%llx   ftintrm.w.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRM_L_S:
+                            printf("   0x%llx   ftintrm.l.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRM_L_D:
+                            printf("   0x%llx   ftintrm.l.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRP_W_S:
+                            printf("   0x%llx   ftintrp.w.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRP_W_D:
+                            printf("   0x%llx   ftintrp.w.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRP_L_S:
+                            printf("   0x%llx   ftintrp.l.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRP_L_D:
+                            printf("   0x%llx   ftintrp.l.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRZ_W_S:
+                            printf("   0x%llx   ftintrz.w.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRZ_W_D:
+                            printf("   0x%llx   ftintrz.w.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRZ_L_S:
+                            printf("   0x%llx   ftintrz.l.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRZ_L_D:
+                            printf("   0x%llx   ftintrz.l.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRNE_W_S:
+                            printf("   0x%llx   ftintrne.w.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRNE_W_D:
+                            printf("   0x%llx   ftintrne.w.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRNE_L_S:
+                            printf("   0x%llx   ftintrne.l.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINTRNE_L_D:
+                            printf("   0x%llx   ftintrne.l.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINT_W_S:
+                            printf("   0x%llx   ftint.w.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINT_W_D:
+                            printf("   0x%llx   ftint.w.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINT_L_S:
+                            printf("   0x%llx   ftint.l.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FTINT_L_D:
+                            printf("   0x%llx   ftint.l.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FFINT_S_W:
+                            printf("   0x%llx   ffint.s.w  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FFINT_S_L:
+                            printf("   0x%llx   ffint.s.l  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FFINT_D_W:
+                            printf("   0x%llx   ffint.d.w  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FFINT_D_L:
+                            printf("   0x%llx   ffint.d.l  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FRINT_S:
+                            printf("   0x%llx   frint.s  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        case LA_2R_FRINT_D:
+                            printf("   0x%llx   frint.d  %s, %s\n", insstrs, fd, fj);
+                            return;
+                        default :
+                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            return;
+                    }
+                    return;
+                }
+
+                default :
+                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    return;
+            }
+            return;
+        }
+        case LA_2RI12_SLTI: //0x8
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            short si12 = ((code >> 10) & 0xfff)<<4;
+            si12 >>= 4;
+            printf("   0x%llx   slti  %s, %s, %d\n", insstrs, rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_SLTUI: //0x9
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            short si12 = ((code >> 10) & 0xfff)<<4;
+            si12 >>= 4;
+            printf("   0x%llx   sltui  %s, %s, %d\n", insstrs, rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_ADDI_W: //0xa
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            short si12 = ((code >> 10) & 0xfff)<<4;
+            si12 >>= 4;
+            printf("   0x%llx   addi.w  %s, %s, %d\n", insstrs, rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_ADDI_D: //0xb
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            short si12 = ((code >> 10) & 0xfff)<<4;
+            si12 >>= 4;
+            printf("   0x%llx   addi.d  %s, %s, %ld\n", insstrs, rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_LU52I_D: //0xc
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            unsigned int si12 = (code >> 10) & 0xfff;
+            printf("   0x%llx   lu52i.d  %s, %s, 0x%x\n", insstrs, rd, rj, si12);
+            return;
+        }
+        case LA_2RI12_ANDI: //0xd
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            unsigned int ui12 = ((code >> 10) & 0xfff);
+            printf("   0x%llx   andi  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
+            return;
+        }
+        case LA_2RI12_ORI: //0xe
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            unsigned int ui12 = ((code >> 10) & 0xfff);
+            printf("   0x%llx   ori  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
+            return;
+        }
+        case LA_2RI12_XORI: //0xf
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            unsigned int ui12 = ((code >> 10) & 0xfff);
+            printf("   0x%llx   xori  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
+            return;
+        }
+
+        default :
+            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            return;
+    }
+
+//Label_OPCODE_1:
+//    opcode = (code >> 24) & 0xff;
+//    //bits: 31-24,MSB8
+
+
+Label_OPCODE_2:
+    opcode = (code >> 20) & 0xfff;
+
+    //bits: 31-20,MSB12
+    switch (opcode)
+    {
+        case LA_4R_FMADD_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *fa = FregName[(code>>15) & 0x1f];
+            printf("   0x%llx   fmadd.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FMADD_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *fa = FregName[(code>>15) & 0x1f];
+            printf("   0x%llx   fmadd.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FMSUB_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *fa = FregName[(code>>15) & 0x1f];
+            printf("   0x%llx   fmsub.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FMSUB_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *fa = FregName[(code>>15) & 0x1f];
+            printf("   0x%llx   fmsub.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FNMADD_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *fa = FregName[(code>>15) & 0x1f];
+            printf("   0x%llx   fnmadd.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FNMADD_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *fa = FregName[(code>>15) & 0x1f];
+            printf("   0x%llx   fnmadd.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FNMSUB_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *fa = FregName[(code>>15) & 0x1f];
+            printf("   0x%llx   fnmsub.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            return;
+        }
+        case LA_4R_FNMSUB_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *fa = FregName[(code>>15) & 0x1f];
+            printf("   0x%llx   fnmsub.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            return;
+        }
+        default :
+            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            return;
+    }
+
+Label_OPCODE_3:
+    opcode = (code >> 20) & 0xfff;
+
+    //bits: 31-20,MSB12
+    switch (opcode)
+    {
+        case LA_OP_FCMP_cond_S:
+        {
+            //bits:19-15,cond
+            unsigned int cond = (code >> 15) & 0x1f;
+            const char *cd = CFregName[code & 0x7];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            switch (cond)
+            {
+                case 0x0:
+                    printf("   0x%llx   fcmp.caf.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x1:
+                    printf("   0x%llx   fcmp.saf.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x2:
+                    printf("   0x%llx   fcmp.clt.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x3:
+                    printf("   0x%llx   fcmp.slt.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x4:
+                    printf("   0x%llx   fcmp.ceq.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x5:
+                    printf("   0x%llx   fcmp.seq.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x6:
+                    printf("   0x%llx   fcmp.cle.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x7:
+                    printf("   0x%llx   fcmp.sle.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x8:
+                    printf("   0x%llx   fcmp.cun.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x9:
+                    printf("   0x%llx   fcmp.sun.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xA:
+                    printf("   0x%llx   fcmp.cult.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xB:
+                    printf("   0x%llx   fcmp.sult.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xC:
+                    printf("   0x%llx   fcmp.cueq.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xD:
+                    printf("   0x%llx   fcmp.sueq.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xE:
+                    printf("   0x%llx   fcmp.cule.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xF:
+                    printf("   0x%llx   fcmp.sule.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x10:
+                    printf("   0x%llx   fcmp.cne.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x11:
+                    printf("   0x%llx   fcmp.sne.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x14:
+                    printf("   0x%llx   fcmp.cor.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x15:
+                    printf("   0x%llx   fcmp.sor.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x18:
+                    printf("   0x%llx   fcmp.cune.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x19:
+                    printf("   0x%llx   fcmp.sune.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                default :
+                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    return;
+            }
+        }
+        case LA_OP_FCMP_cond_D:
+        {
+            //bits:19-15,cond
+            unsigned int cond = (code >> 15) & 0x1f;
+            const char *cd = CFregName[code & 0x7];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            switch (cond)
+            {
+                case 0x0:
+                    printf("   0x%llx   fcmp.caf.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x1:
+                    printf("   0x%llx   fcmp.saf.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x2:
+                    printf("   0x%llx   fcmp.clt.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x3:
+                    printf("   0x%llx   fcmp.slt.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x4:
+                    printf("   0x%llx   fcmp.ceq.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x5:
+                    printf("   0x%llx   fcmp.seq.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x6:
+                    printf("   0x%llx   fcmp.cle.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x7:
+                    printf("   0x%llx   fcmp.sle.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x8:
+                    printf("   0x%llx   fcmp.cun.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x9:
+                    printf("   0x%llx   fcmp.sun.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xA:
+                    printf("   0x%llx   fcmp.cult.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xB:
+                    printf("   0x%llx   fcmp.sult.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xC:
+                    printf("   0x%llx   fcmp.cueq.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xD:
+                    printf("   0x%llx   fcmp.sueq.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xE:
+                    printf("   0x%llx   fcmp.cule.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0xF:
+                    printf("   0x%llx   fcmp.sule.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x10:
+                    printf("   0x%llx   fcmp.cne.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x11:
+                    printf("   0x%llx   fcmp.sne.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x14:
+                    printf("   0x%llx   fcmp.cor.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x15:
+                    printf("   0x%llx   fcmp.sor.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x18:
+                    printf("   0x%llx   fcmp.cune.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                case 0x19:
+                    printf("   0x%llx   fcmp.sune.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    return;
+                default :
+                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    return;
+            }
+        }
+        case LA_4R_FSEL:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *fj = FregName[(code>>5) & 0x1f];
+            const char *fk = FregName[(code>>10) & 0x1f];
+            const char *ca = CFregName[(code>>15) & 0x7];
+            printf("   0x%llx   fsel  %s, %s, %s, %s\n", insstrs, fd, fj, fk, ca);
+            return;
+        }
+        default :
+            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            return;
+    }
+
+Label_OPCODE_E:
+    opcode = (code >> 15) & 0x1ffff;
+
+    //bits: 31-15,MSB17
+    switch (opcode)
+    {
+        case LA_3R_LDX_B:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldx.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_H:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldx.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldx.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldx.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STX_B:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stx.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STX_H:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stx.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STX_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stx.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STX_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stx.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_BU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldx.bu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_HU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldx.hu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDX_WU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldx.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_PRELDX:
+            assert(!"unimplemented on loongarch yet!");
+            return;
+        case LA_3R_FLDX_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fldx.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FLDX_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fldx.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTX_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fstx.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTX_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fstx.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_AMSWAP_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amswap.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMSWAP_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amswap.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMADD_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amadd.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMADD_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amadd.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMAND_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amand.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMAND_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amand.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMOR_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amor.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMOR_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amor.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMXOR_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amxor.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMXOR_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amxor.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammax.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammax.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammin.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammin.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_WU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammax.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammax.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_WU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammin.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammin.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMSWAP_DB_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amswap_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMSWAP_DB_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amswap_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMADD_DB_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amadd_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMADD_DB_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amadd_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMAND_DB_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amand_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMAND_DB_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amand_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMOR_DB_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amor_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMOR_DB_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amor_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMXOR_DB_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amxor_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMXOR_DB_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   amxor_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DB_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammax_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DB_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammax_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DB_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammin_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DB_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammin_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DB_WU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammax_db.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMAX_DB_DU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammax_db.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DB_WU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammin_db.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_AMMIN_DB_DU:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ammin_db.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_OP_DBAR:
+        {
+            unsigned int hint = code & 0x7fff;
+            printf("   0x%llx   dbar  0x%x\n", insstrs, hint);
+            return;
+        }
+        case LA_OP_IBAR:
+        {
+            unsigned int hint = code & 0x7fff;
+            printf("   0x%llx   ibar  0x%x\n", insstrs, hint);
+            return;
+        }
+        case LA_3R_FLDGT_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fldgt.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FLDGT_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fldgt.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FLDLE_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fldle.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FLDLE_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fldle.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTGT_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fstgt.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTGT_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fstgt.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTLE_S:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fstle.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_FSTLE_D:
+        {
+            const char *fd = FregName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   fstle.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            return;
+        }
+        case LA_3R_LDGT_B:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldgt.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDGT_H:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldgt.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDGT_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldgt.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDGT_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldgt.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDLE_B:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldle.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDLE_H:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldle.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDLE_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldle.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_LDLE_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   ldle.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STGT_B:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stgt.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STGT_H:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stgt.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STGT_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stgt.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STGT_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stgt.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STLE_B:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stle.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STLE_H:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stle.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STLE_W:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stle.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        case LA_3R_STLE_D:
+        {
+            const char *rd = regName[code & 0x1f];
+            const char *rj = regName[(code>>5) & 0x1f];
+            const char *rk = regName[(code>>10) & 0x1f];
+            printf("   0x%llx   stle.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            return;
+        }
+        default :
+            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            return;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display (optionally) the instruction encoding in hex
+ */
+
+void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz)
+{
+    // We do not display the instruction hex if we want diff-able disassembly
+    if (!emitComp->opts.disDiffable)
+    {
+        if (sz == 4)
+        {
+            printf("  %08X    ", (*((code_t*)code)));
+        }
+        else
+        {
+            assert(sz == 0);
+            printf("              ");
+        }
+    }
+}
+
+void emitter::emitDispIns(
+    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
+{//not used on loongarch64.
+    printf("------------not implements emitDispIns() for loongarch64!!!\n");
+}
+
+/*****************************************************************************
+ *
+ *  Display a stack frame reference.
+ */
+
+void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+{
+    printf("[");
+
+    if (varx < 0)
+        printf("TEMP_%02u", -varx);
+    else
+        emitComp->gtDispLclVar(+varx, false);
+
+    if (disp < 0)
+        printf("-0x%02x", -disp);
+    else if (disp > 0)
+        printf("+0x%02x", +disp);
+
+    printf("]");
+
+    if (varx >= 0 && emitComp->opts.varNames)
+    {
+        LclVarDsc*  varDsc;
+        const char* varName;
+
+        assert((unsigned)varx < emitComp->lvaCount);
+        varDsc  = emitComp->lvaTable + varx;
+        varName = emitComp->compLocalVarName(varx, offs);
+
+        if (varName)
+        {
+            printf("'%s", varName);
+
+            if (disp < 0)
+                printf("-%d", -disp);
+            else if (disp > 0)
+                printf("+%d", +disp);
+
+            printf("'");
+        }
+    }
+}
+
+#endif // DEBUG
+
+// Generate code for a load or store operation with a potentially complex addressing mode
+// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*sccale + offset]
+// Since LOONGARCH64 does not directly support this complex of an addressing mode
+// we may generates up to three instructions for this for LOONGARCH64
+//
+void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir)
+{
+    GenTree* addr = indir->Addr();
+
+    if (addr->isContained())
+    {
+        assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR, GT_LEA));
+
+        int offset = 0;
+        DWORD lsl = 0;
+
+        if (addr->OperGet() == GT_LEA)
+        {
+            offset = addr->AsAddrMode()->Offset();
+            if (addr->AsAddrMode()->gtScale > 0)
+            {
+                assert(isPow2(addr->AsAddrMode()->gtScale));
+                BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+            }
+        }
+
+        GenTree* memBase = indir->Base();
+        emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE;
+
+        if (indir->HasIndex())
+        {
+            GenTree* index = indir->Index();
+
+            if (offset != 0)
+            {
+                regNumber tmpReg = indir->GetSingleTempReg();
+
+                if (isValidSimm12(offset))
+                {
+                    if (lsl > 0)
+                    {
+                        // Generate code to set tmpReg = base + index*scale
+                        emitIns_R_R_I(INS_slli_d, addType, REG_R21, index->GetRegNum(), lsl);
+                        emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), REG_R21);
+                    }
+                    else // no scale
+                    {
+                        // Generate code to set tmpReg = base + index
+                        emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum());
+                    }
+
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+
+                    // Then load/store dataReg from/to [tmpReg + offset]
+                    emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset);
+                }
+                else // large offset
+                {
+                    // First load/store tmpReg with the large offset constant
+                    emitIns_I_la(EA_PTRSIZE, tmpReg, offset);//codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+                    // Then add the base register
+                    //      rd = rd + base
+                    emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, memBase->GetRegNum());
+
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+                    noway_assert(tmpReg != index->GetRegNum());
+
+                    // Then load/store dataReg from/to [tmpReg + index*scale]
+                    emitIns_R_R_I(INS_slli_d, addType, REG_R21, index->GetRegNum(), lsl);
+                    emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, REG_R21);
+                    emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0);
+                }
+            }
+            else // (offset == 0)
+            {
+                // Then load/store dataReg from/to [memBase + index]
+                switch (EA_SIZE(emitTypeSize(indir->TypeGet())))
+                {
+                case EA_1BYTE:
+                assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)));
+                    if (ins <= INS_ld_wu)
+                    {
+                        if (varTypeIsUnsigned(indir->TypeGet()))
+                            ins = INS_ldx_bu;
+                        else
+                            ins = INS_ldx_b;
+                    }
+                    else
+                        ins = INS_stx_b;
+                    break;
+                case EA_2BYTE:
+                assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)));
+                    if (ins <= INS_ld_wu)
+                    {
+                        if (varTypeIsUnsigned(indir->TypeGet()))
+                            ins = INS_ldx_hu;
+                        else
+                            ins = INS_ldx_h;
+                    }
+                    else
+                        ins = INS_stx_h;
+                    break;
+                case EA_4BYTE:
+                assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || (ins == INS_fst_s) || (ins == INS_fld_s));
+                    assert(INS_fst_s > INS_st_d);
+                    if (ins <= INS_ld_wu)
+                    {
+                        if (varTypeIsUnsigned(indir->TypeGet()))
+                            ins = INS_ldx_wu;
+                        else
+                            ins = INS_ldx_w;
+                    }
+                    else if (ins == INS_fld_s)
+                        ins = INS_fldx_s;
+                    else if (ins == INS_fst_s)
+                        ins = INS_fstx_s;
+                    else
+                        ins = INS_stx_w;
+                    break;
+                case EA_8BYTE:
+                assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || (ins == INS_fst_d) || (ins == INS_fld_d));
+                    assert(INS_fst_d > INS_st_d);
+                    if (ins <= INS_ld_wu)
+                    {
+                        ins = INS_ldx_d;
+                    }
+                    else if (ins == INS_fld_d)
+                        ins = INS_fldx_d;
+                    else if (ins == INS_fst_d)
+                        ins = INS_fstx_d;
+                    else
+                        ins = INS_stx_d;
+                    break;
+                default:
+                    assert(!"------------TODO for LOONGARCH64: unsupported ins.");
+                }
+
+                if (lsl > 0)
+                {
+                    // Then load/store dataReg from/to [memBase + index*scale]
+                    emitIns_R_R_I(INS_slli_d, emitActualTypeSize(index->TypeGet()), REG_R21, index->GetRegNum(), lsl);
+                    emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), REG_R21);
+                }
+                else // no scale
+                {
+                    emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
+                }
+            }
+        }
+        else // no Index register
+        {
+            if (addr->OperGet() == GT_CLS_VAR_ADDR)
+            {
+                // Get a temp integer register to compute long address.
+                regNumber addrReg = indir->GetSingleTempReg();
+                emitIns_R_C(ins, attr, dataReg, addrReg, addr->AsClsVar()->gtClsVarHnd, 0);
+            }
+            else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
+            {
+                GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+                unsigned             lclNum  = varNode->GetLclNum();
+                unsigned             offset  = varNode->GetLclOffs();
+                if (emitInsIsStore(ins))
+                {
+                    emitIns_S_R(ins, attr, dataReg, lclNum, offset);
+                }
+                else
+                {
+                    emitIns_R_S(ins, attr, dataReg, lclNum, offset);
+                }
+            }
+            else if (isValidSimm12(offset))
+            {
+                // Then load/store dataReg from/to [memBase + offset]
+                emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset);
+            }
+            else
+            {
+                // We require a tmpReg to hold the offset
+                regNumber tmpReg = indir->GetSingleTempReg();
+
+                // First load/store tmpReg with the large offset constant
+                emitIns_I_la(EA_PTRSIZE, tmpReg, offset);
+                //codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+
+                // Then load/store dataReg from/to [memBase + tmpReg]
+                emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), tmpReg);
+                emitIns_R_R_I(ins, attr, dataReg, tmpReg, 0);
+            }
+        }
+    }
+    else // addr is not contained, so we evaluate it into a register
+    {
+#ifdef DEBUG
+  if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
+  {
+      // If the local var is a gcref or byref, the local var better be untracked, because we have
+      // no logic here to track local variable lifetime changes, like we do in the contained case
+      // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local
+      // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth.
+      GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+      unsigned             lclNum  = varNode->GetLclNum();
+      LclVarDsc*           varDsc  = emitComp->lvaGetDesc(lclNum);
+      assert(!varDsc->lvTracked);
+  }
+#endif // DEBUG
+        // Then load/store dataReg from/to [addrReg]
+        emitIns_R_R_I(ins, attr, dataReg, addr->GetRegNum(), 0);
+    }
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+    return REG_R0;
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2)
+{
+    // dst can only be a reg
+    assert(!dst->isContained());
+
+    // find immed (if any) - it cannot be a dst
+    // Only one src can be an int.
+    GenTreeIntConCommon* intConst  = nullptr;
+    GenTree*             nonIntReg = nullptr;
+
+    bool needCheckOv = dst->gtOverflowEx();
+
+    if (varTypeIsFloating(dst))
+    {
+        // src1 can only be a reg
+        assert(!src1->isContained());
+        // src2 can only be a reg
+        assert(!src2->isContained());
+    }
+    else // not floating point
+    {
+        // src2 can be immed or reg
+        assert(!src2->isContained() || src2->isContainedIntOrIImmed());
+
+        // Check src2 first as we can always allow it to be a contained immediate
+        if (src2->isContainedIntOrIImmed())
+        {
+            intConst  = src2->AsIntConCommon();
+            nonIntReg = src1;
+        }
+        // Only for commutative operations do we check src1 and allow it to be a contained immediate
+        else if (dst->OperIsCommutative())
+        {
+            // src1 can be immed or reg
+            assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+
+            // Check src1 and allow it to be a contained immediate
+            if (src1->isContainedIntOrIImmed())
+            {
+                assert(!src2->isContainedIntOrIImmed());
+                intConst  = src1->AsIntConCommon();
+                nonIntReg = src2;
+            }
+        }
+        else
+        {
+            // src1 can only be a reg
+            assert(!src1->isContained());
+        }
+    }
+
+    if (needCheckOv)
+    {
+        if (ins == INS_add_d)
+        {
+            assert(attr == EA_8BYTE);
+        }
+        else if (ins == INS_add_w)// || ins == INS_add
+        {
+            assert(attr == EA_4BYTE);
+        }
+        else if (ins == INS_addi_d)
+        {
+            assert(intConst != nullptr);
+        }
+        else if (ins == INS_addi_w)
+        {
+            assert(intConst != nullptr);
+        }
+        else if (ins == INS_sub_d)
+        {
+            assert(attr == EA_8BYTE);
+        }
+        else if (ins == INS_sub_w)
+        {
+            assert(attr == EA_4BYTE);
+        }
+        else if ((ins == INS_mul_d) || (ins == INS_mulh_d) || (ins == INS_mulh_du))
+        {
+            assert(attr == EA_8BYTE);
+            //NOTE: overflow format doesn't support an int constant operand directly.
+            assert(intConst == nullptr);
+        }
+        else if ((ins == INS_mul_w) || (ins == INS_mulw_d_w) || (ins == INS_mulh_w) || (ins == INS_mulh_wu) || (ins == INS_mulw_d_wu))
+        {
+            assert(attr == EA_4BYTE);
+            //NOTE: overflow format doesn't support an int constant operand directly.
+            assert(intConst == nullptr);
+        }
+        else
+        {
+#ifdef DEBUG
+            printf("LOONGARCH64-Invalid ins for overflow check: %s\n", codeGen->genInsName(ins));
+#endif
+            assert(!"Invalid ins for overflow check");
+        }
+    }
+
+    if (intConst != nullptr)
+    {//should re-design this case!!! ---2020.04.11.
+        ssize_t imm = intConst->IconValue();
+        if (ins == INS_andi || ins == INS_ori || ins == INS_xori)
+            //assert((0 <= imm) && (imm <= 0xfff));
+            assert((-2048 <= imm) && (imm <= 0xfff));
+        else
+            assert((-2049 < imm) && (imm < 2048));
+
+        if (ins == INS_sub_d)
+        {
+            assert(attr == EA_8BYTE);
+            assert(imm != -2048);
+            ins = INS_addi_d;
+            imm = -imm;
+        }
+        else if (ins == INS_sub_w)
+        {
+            assert(attr == EA_4BYTE);
+            assert(imm != -2048);
+            ins = INS_addi_w;
+            imm = -imm;
+        }
+
+        assert(ins == INS_addi_d || ins == INS_addi_w || ins == INS_andi || ins == INS_ori || ins == INS_xori);
+
+        if ((imm < 0) && (ins == INS_andi || ins == INS_ori || ins == INS_xori))
+        {
+            assert(attr == EA_8BYTE || attr == EA_4BYTE);
+            assert(nonIntReg->GetRegNum() != REG_R21);
+
+            emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm);
+
+            if (ins == INS_andi)
+            {
+                ins = INS_and;
+            }
+            else if (ins == INS_ori)
+            {
+                ins = INS_or;
+            }
+            else if (ins == INS_xori)
+            {
+                ins = INS_xor;
+            }
+            else
+            {
+                unreached();
+            }
+
+            emitIns_R_R_R(ins, attr, dst->GetRegNum(), REG_R21, nonIntReg->GetRegNum());
+
+            goto L_Done;
+        }
+
+        if (needCheckOv)
+        {
+            emitIns_R_R_R(INS_or, attr, REG_R21, nonIntReg->GetRegNum(), REG_R0);
+        }
+
+        emitIns_R_R_I(ins, attr, dst->GetRegNum(), nonIntReg->GetRegNum(), imm);
+
+        if (needCheckOv)
+        {
+            if (ins == INS_addi_d || ins == INS_addi_w)
+            {
+                // A = B + C
+                if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+                {
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, dst->GetRegNum(), nullptr, REG_R21);
+                }
+                else
+                {
+                    if (imm > 0)
+                    {
+                        // B > 0 and C > 0, if A < B, goto overflow
+                        BasicBlock* tmpLabel = codeGen->genCreateTempLabel();
+                        emitIns_J_cond_la(INS_bge, tmpLabel, REG_R0, REG_R21);
+                        emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_R21, dst->GetRegNum(), imm);
+
+                        codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+
+                        codeGen->genDefineTempLabel(tmpLabel);
+                    }
+                    else if (imm < 0)
+                    {
+                        // B < 0 and C < 0, if A > B, goto overflow
+                        BasicBlock* tmpLabel = codeGen->genCreateTempLabel();
+                        emitIns_J_cond_la(INS_bge, tmpLabel, REG_R21, REG_R0);
+                        emitIns_R_R_I(INS_addi_d, attr, REG_R21, REG_R0, imm);
+
+                        codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_blt, REG_R21, nullptr, dst->GetRegNum());
+
+                        codeGen->genDefineTempLabel(tmpLabel);
+                    }
+                }
+            }
+            else
+            {
+                assert(!"unimplemented on LOONGARCH yet");
+            }
+        }
+    }
+    else if (varTypeIsFloating(dst))
+    {
+        emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+    }
+    else if (dst->OperGet() == GT_MUL)
+    {
+        if (!needCheckOv && !(dst->gtFlags & GTF_UNSIGNED))
+        {
+            emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+        }
+        else
+        {
+            if (needCheckOv)
+            {
+                assert(REG_R21 != dst->GetRegNum());
+                assert(REG_R21 != src1->GetRegNum());
+                assert(REG_R21 != src2->GetRegNum());
+
+                instruction ins2;
+
+                if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+                {
+                    if (attr == EA_4BYTE)
+                        ins2 = INS_mulh_wu;
+                    else
+                        ins2 = INS_mulh_du;
+                }
+                else
+                {
+                    if (attr == EA_8BYTE)
+                        ins2 = INS_mulh_d;
+                    else
+                        ins2 = INS_mulh_w;
+                }
+
+                emitIns_R_R_R(ins2, attr, REG_R21, src1->GetRegNum(), src2->GetRegNum());
+            }
+
+            // n * n bytes will store n bytes result
+            emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+
+            if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+            {
+                if (attr == EA_4BYTE)
+                    emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, dst->GetRegNum(), REG_R0, 63, 32);
+                //else
+                //{
+                //    assert(!"unimplemented on LOONGARCH yet:  ulong * ulong !!!");
+                //}
+            }
+
+            if (needCheckOv)
+            {
+                assert(REG_R21 != dst->GetRegNum());
+                assert(REG_R21 != src1->GetRegNum());
+                assert(REG_R21 != src2->GetRegNum());
+
+                if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+                {
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21);
+                }
+                else
+                {
+                    size_t imm = (EA_SIZE(attr) == EA_8BYTE) ? 63 : 31;
+                    emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_T0, dst->GetRegNum(), imm);
+                    //TODO: FIXME:should confirm reg REG_T0!
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_T0);
+                }
+            }
+        }
+    }
+    else if (dst->OperGet() == GT_AND || dst->OperGet() == GT_OR || dst->OperGet() == GT_XOR)
+    {
+        emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
+
+        //NOTE: can/should amend: LOONGARCH needs to sign-extend dst when deal with 32bit data.
+        if (EA_SIZE(attr) == EA_4BYTE)
+            emitIns_R_R_I(INS_slli_w, attr, dst->GetRegNum(), dst->GetRegNum(), 0);
+    }
+    else
+    {
+        regNumber regOp1 = src1->GetRegNum();
+        regNumber regOp2 = src2->GetRegNum();
+        regNumber saveOperReg1 = REG_NA;
+        regNumber saveOperReg2 = REG_NA;
+
+        if ((dst->gtFlags & GTF_UNSIGNED) && (attr == EA_8BYTE))
+        {
+            if (src1->gtType == TYP_INT)
+            {
+                assert(REG_R21 != regOp1);
+                assert(REG_RA != regOp1);
+                emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp1, /*src1->GetRegNum(),*/ 31, 0);
+                regOp1 = REG_RA;//dst->ExtractTempReg();
+            }
+            if (src2->gtType == TYP_INT)
+            {
+                assert(REG_R21 != regOp2);
+                assert(REG_RA != regOp2);
+                emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_R21, regOp2, /*src2->GetRegNum(),*/ 31, 0);
+                regOp2 = REG_R21;//dst->ExtractTempReg();
+            }
+        }
+        if (needCheckOv)
+        {
+            assert(!varTypeIsFloating(dst));
+
+            assert(REG_R21 != dst->GetRegNum());
+            assert(REG_RA != dst->GetRegNum());
+
+            if (dst->GetRegNum() == regOp1)
+            {
+                assert(REG_R21 != regOp1);
+                assert(REG_RA != regOp1);
+                saveOperReg1 = REG_R21;
+                saveOperReg2 = regOp2;
+                emitIns_R_R_R(INS_or, attr, REG_R21, regOp1, REG_R0);
+            }
+            else if (dst->GetRegNum() == regOp2)
+            {
+                assert(REG_R21 != regOp2);
+                assert(REG_RA != regOp2);
+                saveOperReg1 = regOp1;
+                saveOperReg2 = REG_R21;
+                emitIns_R_R_R(INS_or, attr, REG_R21, regOp2, REG_R0);
+            }
+            else
+            {
+                saveOperReg1 = regOp1;
+                saveOperReg2 = regOp2;
+            }
+        }
+
+        emitIns_R_R_R(ins, attr, dst->GetRegNum(), regOp1, regOp2);
+
+        if (needCheckOv)
+        {
+            if (dst->OperGet() == GT_ADD || dst->OperGet() == GT_SUB)
+            {
+                ssize_t imm;
+                regNumber tempReg1;
+                regNumber tempReg2;
+                // ADD : A = B + C
+                // SUB : C = A - B
+                if ((dst->gtFlags & GTF_UNSIGNED) != 0)
+                {
+                    // if A < B, goto overflow
+                    if (dst->OperGet() == GT_ADD)
+                    {
+                        tempReg1 = dst->GetRegNum();
+                        tempReg2 = saveOperReg1;
+                    }
+                    else
+                    {
+                        tempReg1 = saveOperReg1;
+                        tempReg2 = saveOperReg2;
+                    }
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, tempReg1, nullptr, tempReg2);
+                }
+                else
+                {
+                    tempReg1 = REG_RA;
+                    tempReg2 = dst->GetSingleTempReg();
+                    assert(tempReg1 != tempReg2);
+                    assert(tempReg1 != saveOperReg1);
+                    assert(tempReg2 != saveOperReg2);
+
+                    ssize_t ui6 = (attr == EA_4BYTE) ? 31 : 63;
+                    if (dst->OperGet() == GT_ADD)
+                        emitIns_R_R_I(INS_srli_d, attr, tempReg1, saveOperReg1, ui6);
+                    else
+                        emitIns_R_R_I(INS_srli_d, attr, tempReg1, dst->GetRegNum(), ui6);
+                    emitIns_R_R_I(INS_srli_d, attr, tempReg2, saveOperReg2, ui6);
+
+                    emitIns_R_R_R(INS_xor, attr, tempReg1, tempReg1, tempReg2);
+                    if (attr == EA_4BYTE)
+                    {
+                        imm = 1;
+                        emitIns_R_R_I(INS_andi, attr, tempReg1, tempReg1, imm);
+                        emitIns_R_R_I(INS_andi, attr, tempReg2, tempReg2, imm);
+                    }
+                    // if (B > 0 && C < 0) || (B < 0  && C > 0), skip overflow
+                    BasicBlock* tmpLabel = codeGen->genCreateTempLabel();
+                    BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel();
+                    BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel();
+
+                    emitIns_J_cond_la(INS_bne, tmpLabel, tempReg1, REG_R0);
+
+                    emitIns_J_cond_la(INS_bne, tmpLabel3, tempReg2, REG_R0);
+
+                    // B > 0 and C > 0, if A < B, goto overflow
+                    emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1, dst->OperGet() == GT_ADD ? saveOperReg1  : saveOperReg2);
+
+                    codeGen->genDefineTempLabel(tmpLabel2);
+
+                    codeGen->genJumpToThrowHlpBlk(EJ_jmp, SCK_OVERFLOW);
+
+                    codeGen->genDefineTempLabel(tmpLabel3);
+
+                    // B < 0 and C < 0, if A > B, goto overflow
+                    emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1  : saveOperReg2, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1);
+
+                    codeGen->genDefineTempLabel(tmpLabel);
+                }
+            }
+            else
+            {
+#ifdef DEBUG
+                printf("---------[LOONGARCH64]-NOTE: UnsignedOverflow instruction %d\n", ins);
+#endif
+                assert(!"unimplemented on LOONGARCH yet");
+            }
+        }
+    }
+
+L_Done:
+
+    return dst->GetRegNum();
+}
+
+unsigned  emitter::get_curTotalCodeSize()
+{
+    return emitTotalCodeSize;
+}
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+
+//----------------------------------------------------------------------------------------
+// getInsExecutionCharacteristics:
+//    Returns the current instruction execution characteristics
+//
+// Arguments:
+//    id  - The current instruction descriptor to be evaluated
+//
+// Return Value:
+//    A struct containing the current instruction execution characteristics
+//
+// Notes:
+//    The instruction latencies and throughput values returned by this function
+//    are NOT accurate and just a function feature.
+emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id)
+{
+    insExecutionCharacteristics result;
+
+    //TODO: support this function for LoongArch64.
+    result.insThroughput = PERFSCORE_THROUGHPUT_ZERO;
+    result.insLatency = PERFSCORE_LATENCY_ZERO;
+    result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE;
+
+    return result;
+}
+
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name.
+//
+// Arguments:
+//    reg - A general-purpose register or SIMD and floating-point register.
+//    size - A register size.
+//    varName - unused parameter.
+//
+// Return value:
+//    A string that represents a general-purpose register name or SIMD and floating-point scalar register name.
+//
+const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName)
+{
+    assert(reg < REG_COUNT);
+
+    const char* rn = nullptr;
+
+    rn = RegNames[reg];
+    assert(rn != nullptr);
+
+    return rn;
+}
+#endif
+
+//------------------------------------------------------------------------
+// IsMovInstruction: Determines whether a give instruction is a move instruction
+//
+// Arguments:
+//    ins       -- The instruction being checked
+//
+bool emitter::IsMovInstruction(instruction ins)
+{
+   switch (ins)
+    {
+        case INS_mov:
+        case INS_fmov_s:
+        case INS_fmov_d:
+        case INS_movgr2fr_w:
+        case INS_movgr2fr_d:
+        case INS_movfr2gr_s:
+        case INS_movfr2gr_d:
+        {
+            return true;
+        }
+
+        default:
+        {
+            return false;
+        }
+    }
+}
+
+//----------------------------------------------------------------------------------------
+// IsRedundantMov:
+//    Check if the current `mov` instruction is redundant and can be omitted.
+//    A `mov` is redundant in following 3 cases:
+//
+//    1. Move to same register
+//       (Except 4-byte movement like "mov w1, w1" which zeros out upper bits of x1 register)
+//
+//         mov Rx, Rx
+//
+//    2. Move that is identical to last instruction emitted.
+//
+//         mov Rx, Ry  # <-- last instruction
+//         mov Rx, Ry  # <-- current instruction can be omitted.
+//
+//    3. Opposite Move as that of last instruction emitted.
+//
+//         mov Rx, Ry  # <-- last instruction
+//         mov Ry, Rx  # <-- current instruction can be omitted.
+//
+// Arguments:
+//    ins  - The current instruction
+//    size - Operand size of current instruction
+//    dst  - The current destination
+//    src  - The current source
+// canSkip - The move can be skipped as it doesn't represent special semantics
+//
+// Return Value:
+//    true if previous instruction moved from current dst to src.
+
+bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+    return false;
+#if 0
+    assert(ins == INS_mov);
+
+    if (canSkip && (dst == src))
+    {
+        // These elisions used to be explicit even when optimizations were disabled
+        return true;
+    }
+
+    if (!emitComp->opts.OptimizationEnabled())
+    {
+        // The remaining move elisions should only happen if optimizations are enabled
+        return false;
+    }
+
+    if (dst == src)
+    {
+        // A mov with a EA_4BYTE has the side-effect of clearing the upper bits
+        // So only eliminate mov instructions that are not clearing the upper bits
+        //
+        if (isGeneralRegisterOrSP(dst) && (size == EA_8BYTE))
+        {
+            JITDUMP("\n -- suppressing mov because src and dst is same 8-byte register.\n");
+            return true;
+        }
+        else if (isVectorRegister(dst) && (size == EA_16BYTE))
+        {
+            JITDUMP("\n -- suppressing mov because src and dst is same 16-byte register.\n");
+            return true;
+        }
+    }
+
+    bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0);
+
+    if (!isFirstInstrInBlock && // Don't optimize if instruction is not the first instruction in IG.
+        (emitLastIns != nullptr) &&
+        (emitLastIns->idIns() == INS_mov) && // Don't optimize if last instruction was not 'mov'.
+        (emitLastIns->idOpSize() == size))   // Don't optimize if operand size is different than previous instruction.
+    {
+        // Check if we did same move in prev instruction except dst/src were switched.
+        regNumber prevDst    = emitLastIns->idReg1();
+        regNumber prevSrc    = emitLastIns->idReg2();
+        insFormat lastInsfmt = emitLastIns->idInsFmt();
+
+        // Sometimes emitLastIns can be a mov with single register e.g. "mov reg, #imm". So ensure to
+        // optimize formats that does vector-to-vector or scalar-to-scalar register movs.
+        //
+        const bool isValidLastInsFormats =
+            ((lastInsfmt == IF_DV_3C) || (lastInsfmt == IF_DR_2G) || (lastInsfmt == IF_DR_2E));
+
+        if (isValidLastInsFormats && (prevDst == dst) && (prevSrc == src))
+        {
+            assert(emitLastIns->idOpSize() == size);
+            JITDUMP("\n -- suppressing mov because previous instruction already moved from src to dst register.\n");
+            return true;
+        }
+
+        if ((prevDst == src) && (prevSrc == dst) && isValidLastInsFormats)
+        {
+            // For mov with EA_8BYTE, ensure src/dst are both scalar or both vector.
+            if (size == EA_8BYTE)
+            {
+                if (isVectorRegister(src) == isVectorRegister(dst))
+                {
+                    JITDUMP("\n -- suppressing mov because previous instruction already did an opposite move from dst "
+                            "to src register.\n");
+                    return true;
+                }
+            }
+
+            // For mov with EA_16BYTE, both src/dst will be vector.
+            else if (size == EA_16BYTE)
+            {
+                assert(isVectorRegister(src) && isVectorRegister(dst));
+                assert(lastInsfmt == IF_DV_3C);
+
+                JITDUMP("\n -- suppressing mov because previous instruction already did an opposite move from dst to "
+                        "src register.\n");
+                return true;
+            }
+
+            // For mov of other sizes, don't optimize because it has side-effect of clearing the upper bits.
+        }
+    }
+
+    return false;
+#endif
+}
+
+//----------------------------------------------------------------------------------------
+// IsRedundantLdStr:
+//    For ldr/str pair next to each other, check if the current load or store is needed or is
+//    the value already present as of previous instruction.
+//
+//    ldr x1,  [x2, #56]
+//    str x1,  [x2, #56]   <-- redundant
+//
+//          OR
+//
+//    str x1,  [x2, #56]
+//    ldr x1,  [x2, #56]   <-- redundant
+
+// Arguments:
+//    ins  - The current instruction
+//    dst  - The current destination
+//    src  - The current source
+//    imm  - Immediate offset
+//    size - Operand size
+//    fmt  - Format of instruction
+// Return Value:
+//    true if previous instruction already has desired value in register/memory location.
+
+bool emitter::IsRedundantLdStr(
+    instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt)
+{
+    assert(!"unimplemented on LOONGARCH yet");
+    return false;
+#if 0
+    bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0);
+
+    if (((ins != INS_ldr) && (ins != INS_str)) || (isFirstInstrInBlock) || (emitLastIns == nullptr))
+    {
+        return false;
+    }
+
+    regNumber prevReg1   = emitLastIns->idReg1();
+    regNumber prevReg2   = emitLastIns->idReg2();
+    insFormat lastInsfmt = emitLastIns->idInsFmt();
+    emitAttr  prevSize   = emitLastIns->idOpSize();
+    ssize_t prevImm = emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns();
+
+    // Only optimize if:
+    // 1. "base" or "base plus immediate offset" addressing modes.
+    // 2. Addressing mode matches with previous instruction.
+    // 3. The operand size matches with previous instruction
+    if (((fmt != IF_LS_2A) && (fmt != IF_LS_2B)) || (fmt != lastInsfmt) || (prevSize != size))
+    {
+        return false;
+    }
+
+    if ((ins == INS_ldr) && (emitLastIns->idIns() == INS_str))
+    {
+        // If reg1 is of size less than 8-bytes, then eliminating the 'ldr'
+        // will not zero the upper bits of reg1.
+
+        // Make sure operand size is 8-bytes
+        //  str w0, [x1, #4]
+        //  ldr w0, [x1, #4]  <-- can't eliminate because upper-bits of x0 won't get set.
+        if (size != EA_8BYTE)
+        {
+            return false;
+        }
+
+        if ((prevReg1 == reg1) && (prevReg2 == reg2) && (imm == prevImm))
+        {
+            JITDUMP("\n -- suppressing 'ldr reg%u [reg%u, #%u]' as previous 'str reg%u [reg%u, #%u]' was from same "
+                    "location.\n",
+                    reg1, reg2, imm, prevReg1, prevReg2, prevImm);
+            return true;
+        }
+    }
+    else if ((ins == INS_str) && (emitLastIns->idIns() == INS_ldr))
+    {
+        // Make sure src and dst registers are not same.
+        //  ldr x0, [x0, #4]
+        //  str x0, [x0, #4]  <-- can't eliminate because [x0+3] is not same destination as previous source.
+        // Note, however, that we can not eliminate store in the following sequence
+        //  ldr wzr, [x0, #4]
+        //  str wzr, [x0, #4]
+        // since load operation doesn't (and can't) change the value of its destination register.
+        if ((reg1 != reg2) && (prevReg1 == reg1) && (prevReg2 == reg2) && (imm == prevImm) && (reg1 != REG_ZR))
+        {
+            JITDUMP("\n -- suppressing 'str reg%u [reg%u, #%u]' as previous 'ldr reg%u [reg%u, #%u]' was from same "
+                    "location.\n",
+                    reg1, reg2, imm, prevReg1, prevReg2, prevImm);
+            return true;
+        }
+    }
+
+    return false;
+#endif
+}
+#endif // defined(TARGET_LOONGARCH64)
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
new file mode 100644
index 0000000000000..50da1b09a0f20
--- /dev/null
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -0,0 +1,421 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Copyright (c) Loongson Technology. All rights reserved.
+
+#if defined(TARGET_LOONGARCH64)
+
+// The LOONGARCH64 instructions are all 32 bits in size.
+// we use an unsigned int to hold the encoded instructions.
+// This typedef defines the type that we use to hold encoded instructions.
+//
+typedef unsigned int code_t;
+
+/************************************************************************/
+/*         Routines that compute the size of / encode instructions      */
+/************************************************************************/
+
+struct CnsVal
+{
+    ssize_t cnsVal;
+    bool    cnsReloc;
+};
+
+#ifdef DEBUG
+
+/************************************************************************/
+/*             Debug-only routines to display instructions              */
+/************************************************************************/
+
+const char* emitFPregName(unsigned reg, bool varName = true);
+const char* emitVectorRegName(regNumber reg);
+
+//NOTE: At least 32bytes within dst.
+void emitDisInsName(code_t code, const BYTE* dst, instrDesc* id);
+#endif // DEBUG
+
+void emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 = REG_R0, regNumber reg2 = REG_R0);
+void emitIns_I_la(emitAttr attr, regNumber reg, ssize_t imm);
+
+/************************************************************************/
+/*  Private members that deal with target-dependent instr. descriptors  */
+/************************************************************************/
+
+private:
+instrDesc* emitNewInstrCallDir(int              argCnt,
+                               VARSET_VALARG_TP GCvars,
+                               regMaskTP        gcrefRegs,
+                               regMaskTP        byrefRegs,
+                               emitAttr         retSize
+                               MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+instrDesc* emitNewInstrCallInd(int              argCnt,
+                               ssize_t          disp,
+                               VARSET_VALARG_TP GCvars,
+                               regMaskTP        gcrefRegs,
+                               regMaskTP        byrefRegs,
+                               emitAttr         retSize
+                               MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+/************************************************************************/
+/*               Private helpers for instruction output                 */
+/************************************************************************/
+
+private:
+bool emitInsIsLoad(instruction ins);
+bool emitInsIsStore(instruction ins);
+bool emitInsIsLoadOrStore(instruction ins);
+
+emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/);
+
+// Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + offset]
+void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir);
+
+//  Emit the 32-bit LOONGARCH64 instruction 'code' into the 'dst'  buffer
+unsigned emitOutput_Instr(BYTE* dst, code_t code);
+
+//NOTEADD: New functions in emitarm64.h
+// Method to do check if mov is redundant with respect to the last instruction.
+// If yes, the caller of this method can choose to omit current mov instruction.
+static bool IsMovInstruction(instruction ins);
+bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip);
+bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);//New functions end.
+
+/************************************************************************
+*
+* This union is used to to encode/decode the special LOONGARCH64 immediate values
+* that is listed as imm(N,r,s) and referred to as 'bitmask immediate'
+*/
+
+union bitMaskImm {
+    struct
+    {
+        unsigned immS : 6; // bits 0..5
+        unsigned immR : 6; // bits 6..11
+        unsigned immN : 1; // bits 12
+    };
+    unsigned immNRS; // concat N:R:S forming a 13-bit unsigned immediate
+};
+
+/************************************************************************
+*
+*  Convert between a 64-bit immediate and its 'bitmask immediate'
+*   representation imm(i16,hw)
+*/
+
+//static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size);
+
+//static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to to encode/decode the special LOONGARCH64 immediate values
+* that is listed as imm(i16,hw) and referred to as 'halfword immediate'
+*/
+
+union halfwordImm {
+    struct
+    {
+        unsigned immVal : 16; // bits  0..15
+        unsigned immHW : 2;   // bits 16..17
+    };
+    unsigned immHWVal; // concat HW:Val forming a 18-bit unsigned immediate
+};
+
+/************************************************************************
+*
+*  Convert between a 64-bit immediate and its 'halfword immediate'
+*   representation imm(i16,hw)
+*/
+
+//static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size);
+
+//static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to encode/decode the special LOONGARCH64 immediate values
+* that is listed as imm(i16,by) and referred to as 'byteShifted immediate'
+*/
+
+union byteShiftedImm {
+    struct
+    {
+        unsigned immVal : 8;  // bits  0..7
+        unsigned immBY : 2;   // bits  8..9
+        unsigned immOnes : 1; // bit   10
+    };
+    unsigned immBSVal; // concat Ones:BY:Val forming a 10-bit unsigned immediate
+};
+
+/************************************************************************
+*
+*  Convert between a 16/32-bit immediate and its 'byteShifted immediate'
+*   representation imm(i8,by)
+*/
+
+//static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);
+
+//static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to to encode/decode the special LOONGARCH64 immediate values
+* that are use for FMOV immediate and referred to as 'float 8-bit immediate'
+*/
+
+union floatImm8 {
+    struct
+    {
+        unsigned immMant : 4; // bits 0..3
+        unsigned immExp : 3;  // bits 4..6
+        unsigned immSign : 1; // bits 7
+    };
+    unsigned immFPIVal; // concat Sign:Exp:Mant forming an 8-bit unsigned immediate
+};
+
+/************************************************************************
+*
+*  Convert between a double and its 'float 8-bit immediate' representation
+*/
+
+//static emitter::floatImm8 emitEncodeFloatImm8(double immDbl);
+
+//static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm);
+
+/************************************************************************
+*
+*  This union is used to to encode/decode the cond, nzcv and imm5 values for
+*   instructions that use them in the small constant immediate field
+*/
+
+union condFlagsImm {
+    struct
+    {
+        //insCond   cond : 4;  // bits  0..3
+        //insCflags flags : 4; // bits  4..7
+        unsigned  imm5 : 5;  // bits  8..12
+    };
+    unsigned immCFVal; // concat imm5:flags:cond forming an 13-bit unsigned immediate
+};
+
+// Returns true if 'reg' represents an integer register.
+static bool isIntegerRegister(regNumber reg)
+{
+    return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST);
+}
+
+// Returns true if 'value' is a legal signed immediate 12 bit encoding.
+static bool isValidSimm12(ssize_t value)
+{
+    return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 );
+};
+
+// Returns true if 'value' is a legal signed immediate 16 bit encoding.
+static bool isValidSimm16(ssize_t value)
+{
+    return -( ((int)1) << 15 ) <= value && value < ( ((int)1) << 15 );
+};
+
+// Returns true if 'value' is a legal signed immediate 20 bit encoding.
+static bool isValidSimm20(ssize_t value)
+{
+    return -( ((int)1) << 19 ) <= value && value < ( ((int)1) << 19 );
+};
+
+/************************************************************************/
+/*           Public inline informational methods                        */
+/************************************************************************/
+
+public:
+
+// Returns the number of bits used by the given 'size'.
+inline static unsigned getBitWidth(emitAttr size)
+{
+    assert(size <= EA_8BYTE);
+    return (unsigned)size * BITS_PER_BYTE;
+}
+
+inline static bool isGeneralRegister(regNumber reg)
+{
+    // Excludes REG_R0 ??
+    return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST);
+}
+
+inline static bool isGeneralRegisterOrR0(regNumber reg)
+{
+    return (reg >= REG_FIRST) && (reg <= REG_INT_LAST);
+} // Includes REG_R0
+
+inline static bool isFloatReg(regNumber reg)
+{
+    return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST);
+}
+
+/************************************************************************/
+/*           The public entry points to output instructions             */
+/************************************************************************/
+
+public:
+void emitIns(instruction ins);
+
+void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_I(instruction ins, emitAttr attr, ssize_t imm);
+void emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs);
+
+void emitIns_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg1, ssize_t hint, ssize_t off, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
+
+void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE);
+
+//NOTEADD: NEW function in emitarm64.
+void emitIns_Mov(
+    instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags)
+{
+    emitIns_R_R(ins, attr, reg1, reg2);
+}
+
+void emitIns_R_R_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt = INS_OPTS_NONE);
+
+// Checks for a large immediate that needs a second instruction
+void emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm);
+
+void emitIns_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R_I(instruction ins,
+                     emitAttr    attr,
+                     regNumber   reg1,
+                     regNumber   reg2,
+                     regNumber   reg3,
+                     ssize_t     imm,
+                     insOpts     opt      = INS_OPTS_NONE,
+                     emitAttr    attrReg2 = EA_UNKNOWN);
+
+void emitIns_R_R_R_Ext(instruction ins,
+                       emitAttr    attr,
+                       regNumber   reg1,
+                       regNumber   reg2,
+                       regNumber   reg3,
+                       insOpts     opt         = INS_OPTS_NONE,
+                       int         shiftAmount = -1);
+
+//NODECHANGE: ADD an arg.
+void emitIns_R_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
+
+//void emitIns_BARR(instruction ins, insBarrier barrier);
+
+void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
+
+void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
+
+void emitIns_S_S_R_R(
+    instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
+
+//void emitIns_R_R_S(
+//    instruction ins, emitAttr attr, regNumber ireg, regNumber ireg2, int sa);
+
+void emitIns_R_R_S_S(
+    instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
+
+void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
+
+void emitIns_R_C(
+    instruction ins, emitAttr attr, regNumber reg, regNumber tmpReg, CORINFO_FIELD_HANDLE fldHnd, int offs);
+
+void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs);
+
+void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, ssize_t offs, ssize_t val);
+
+void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg);
+
+void emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int instrCount);
+
+void emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int offs);
+
+void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
+
+//NODECHANGE: ADD a description of arguments "disp"
+void emitIns_R_AI(instruction ins,
+                  emitAttr    attr,
+                  regNumber   reg,
+                  ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
+
+
+void emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
+
+void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_R_ARX(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+enum EmitCallType
+{
+
+    // I have included here, but commented out, all the values used by the x86 emitter.
+    // However, LOONGARCH has a much reduced instruction set, and so the LOONGARCH emitter only
+    // supports a subset of the x86 variants.  By leaving them commented out, it becomes
+    // a compile time error if code tries to use them (and hopefully see this comment
+    // and know why they are unavailible on LOONGARCH), while making it easier to stay
+    // in-sync with x86 and possibly add them back in if needed.
+
+    EC_FUNC_TOKEN, //   Direct call to a helper/static/nonvirtual/global method
+                   //  EC_FUNC_TOKEN_INDIR,    // Indirect call to a helper/static/nonvirtual/global method
+    //EC_FUNC_ADDR,  // Direct call to an absolute address
+
+    //  EC_FUNC_VIRTUAL,        // Call to a virtual method (using the vtable)
+    EC_INDIR_R, // Indirect call via register
+                //  EC_INDIR_SR,            // Indirect call via stack-reference (local var)
+                //  EC_INDIR_C,             // Indirect call via static class var
+                //  EC_INDIR_ARD,           // Indirect call via an addressing mode
+
+    EC_COUNT
+};
+
+void emitIns_Call(EmitCallType          callType,
+                  CORINFO_METHOD_HANDLE methHnd,
+                  INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                  void*            addr,
+                  ssize_t          argSize,
+                  emitAttr         retSize
+                  MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                  VARSET_VALARG_TP ptrVars,
+                  regMaskTP        gcrefRegs,
+                  regMaskTP        byrefRegs,
+                  const DebugInfo& di,
+                  regNumber        ireg          = REG_NA,
+                  regNumber        xreg          = REG_NA,
+                  unsigned         xmul          = 0,
+                  ssize_t          disp          = 0,
+                  bool             isJump        = false);
+
+unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code);
+//BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i);
+//BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id);
+//BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
+//BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg);
+//BYTE* emitOutputShortConstant(
+//    BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize);
+
+unsigned  get_curTotalCodeSize(); // bytes of code
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/emitpub.h b/src/coreclr/jit/emitpub.h
index 4982104acc749..02ab3bb879d6f 100644
--- a/src/coreclr/jit/emitpub.h
+++ b/src/coreclr/jit/emitpub.h
@@ -139,7 +139,7 @@ static void InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADE
 /*                   Interface for generating unwind information        */
 /************************************************************************/
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
 bool emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFragment = NULL);
 
@@ -151,7 +151,7 @@ void emitSplit(emitLocation*         startLoc,
 
 void emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp);
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || defined(TARGET_LOONGARCH64)
 
 #if defined(TARGET_ARM)
 
diff --git a/src/coreclr/jit/error.h b/src/coreclr/jit/error.h
index 126a8665a34e8..fdd75fed5f535 100644
--- a/src/coreclr/jit/error.h
+++ b/src/coreclr/jit/error.h
@@ -174,6 +174,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
 #define NYI_X86(msg)    do { } while (0)
 #define NYI_ARM(msg)    do { } while (0)
 #define NYI_ARM64(msg)  do { } while (0)
+#define NYI_LOONGARCH64(msg) do { } while (0)
 
 #elif defined(TARGET_X86)
 
@@ -181,6 +182,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
 #define NYI_X86(msg)    NYIRAW("NYI_X86: " msg)
 #define NYI_ARM(msg)    do { } while (0)
 #define NYI_ARM64(msg)  do { } while (0)
+#define NYI_LOONGARCH64(msg) do { } while (0)
 
 #elif defined(TARGET_ARM)
 
@@ -188,6 +190,7 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
 #define NYI_X86(msg)    do { } while (0)
 #define NYI_ARM(msg)    NYIRAW("NYI_ARM: " msg)
 #define NYI_ARM64(msg)  do { } while (0)
+#define NYI_LOONGARCH64(msg) do { } while (0)
 
 #elif defined(TARGET_ARM64)
 
@@ -195,10 +198,18 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
 #define NYI_X86(msg)    do { } while (0)
 #define NYI_ARM(msg)    do { } while (0)
 #define NYI_ARM64(msg)  NYIRAW("NYI_ARM64: " msg)
+#define NYI_LOONGARCH64(msg) do { } while (0)
+
+#elif defined(TARGET_LOONGARCH64)
+#define NYI_AMD64(msg)  do { } while (0)
+#define NYI_X86(msg)    do { } while (0)
+#define NYI_ARM(msg)    do { } while (0)
+#define NYI_ARM64(msg)  do { } while (0)
+#define NYI_LOONGARCH64(msg) NYIRAW("NYI_LOONGARCH64: " msg)
 
 #else
 
-#error "Unknown platform, not x86, ARM, or AMD64?"
+#error "Unknown platform, not x86, ARM, LOONGARCH64 or AMD64?"
 
 #endif
 
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 4b14dc38522be..622c3e8d1b640 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -3042,6 +3042,27 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ
             *pCostSz += idx->GetCostSz();
         }
 
+        if (cns != 0)
+        {
+            if (cns >= (4096 * genTypeSize(type)))
+            {
+                *pCostEx += 1;
+                *pCostSz += 4;
+            }
+        }
+#elif defined(TARGET_LOONGARCH64)
+        if (base)
+        {
+            *pCostEx += base->GetCostEx();
+            *pCostSz += base->GetCostSz();
+        }
+
+        if (idx)
+        {
+            *pCostEx += idx->GetCostEx();
+            *pCostSz += idx->GetCostSz();
+        }
+        // TODO: workround, should amend for LoongArch64.
         if (cns != 0)
         {
             if (cns >= (4096 * genTypeSize(type)))
@@ -3464,13 +3485,20 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
             }
                 goto COMMON_CNS;
 
+#elif defined(TARGET_LOONGARCH64)
+            case GT_CNS_STR:
+            case GT_CNS_LNG:
+            case GT_CNS_INT:
+            // TODO: workround, should amend for LoongArch64.
+                costEx = 4;
+                costSz = 4;
+            goto COMMON_CNS;
 #else
             case GT_CNS_STR:
             case GT_CNS_LNG:
             case GT_CNS_INT:
 #error "Unknown TARGET"
 #endif
-
             COMMON_CNS:
                 /*
                     Note that some code below depends on constants always getting
@@ -3526,6 +3554,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                     costEx = IND_COST_EX;
                     costSz = 4;
                 }
+#elif defined(TARGET_LOONGARCH64)
+                // TODO: workround, should amend for LoongArch64.
+                costEx = 2;
+                costSz = 8;
 #else
 #error "Unknown TARGET"
 #endif
@@ -3699,6 +3731,15 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                         costEx = IND_COST_EX * 2;
                         costSz = 6;
                     }
+#elif defined(TARGET_LOONGARCH64)
+                    // TODO: workround, should amend for LoongArch64.
+                    costEx = 1;
+                    costSz = 2;
+                    if (isflt || varTypeIsFloating(op1->TypeGet()))
+                    {
+                        costEx = 2;
+                        costSz = 4;
+                    }
 #else
 #error "Unknown TARGET"
 #endif
@@ -5909,6 +5950,9 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
     switch (type)
     {
         case TYP_INT:
+#ifdef TARGET_LOONGARCH64
+        case TYP_UINT:
+#endif
             zero = gtNewIconNode(0);
             break;
 
@@ -6703,7 +6747,7 @@ bool GenTreeOp::UsesDivideByConstOptimized(Compiler* comp)
     }
 
 // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     if (!comp->opts.MinOpts() && ((divisorValue >= 3) || !isSignedDivide))
     {
         // All checks pass we can perform the division operation using a reciprocal multiply.
@@ -13569,8 +13613,11 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
 
         case TYP_INT:
 
+#ifdef TARGET_LOONGARCH64
+            assert(tree->TypeIs(TYP_INT) || tree->TypeIs(TYP_LONG) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY));
+#else
             assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY));
-
+#endif
             // No GC pointer types should be folded here...
             assert(!varTypeIsGC(op1->TypeGet()) && !varTypeIsGC(op2->TypeGet()));
 
@@ -21353,6 +21400,42 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
                 m_regType[i] = comp->getJitGCType(gcPtrs[i]);
             }
 
+#elif defined(TARGET_LOONGARCH64)
+            assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE)));
+
+            DWORD numFloatFields = comp->info.compCompHnd->getFieldTypeByHnd(retClsHnd);
+            BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+            comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
+
+            if (numFloatFields & 0x8)
+            {
+                assert((structSize > 8) == ((numFloatFields & 0x30) > 0));
+                m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                comp->compFloatingPointUsed = true;
+            }
+            else if (numFloatFields & 0x2)
+            {
+                assert((structSize > 8) == ((numFloatFields & 0x30) > 0));
+                m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1] = numFloatFields & 0x20 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
+                comp->compFloatingPointUsed = true;
+            }
+            else if (numFloatFields & 0x4)
+            {
+                assert((structSize > 8) == ((numFloatFields & 0x30) > 0));
+                m_regType[0] = numFloatFields & 0x10 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
+                m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                comp->compFloatingPointUsed = true;
+            }
+            else
+            {
+                for (unsigned i = 0; i < 2; ++i)
+                {
+                    m_regType[i] = comp->getJitGCType(gcPtrs[i]);
+                }
+            }
+
 #elif defined(TARGET_X86)
 
             // an 8-byte struct returned using two registers
@@ -21543,6 +21626,21 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const
         resultReg = (regNumber)((unsigned)(REG_FLOATRET) + idx); // V0, V1, V2 or V3
     }
 
+#elif defined(TARGET_LOONGARCH64)
+    var_types regType = GetReturnRegType(idx);
+    if (idx == 0)
+    {
+        resultReg = varTypeIsIntegralOrI(regType) ? REG_INTRET : REG_FLOATRET; // V0 or F0
+    }
+    else
+    {
+        noway_assert(idx < 2);                                  // Up to 2 return registers for two-float-field structs
+        if (varTypeIsIntegralOrI(regType))
+            resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // V0 or V1
+        else //if (!varTypeIsIntegralOrI(regType))
+            resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1
+    }
+
 #endif // TARGET_XXX
 
     assert(resultReg != REG_NA);
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index 3fd3792eb8b1f..190a280cad12f 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -4487,12 +4487,17 @@ struct GenTreeCall final : public GenTree
         }
 #endif
 
+#if defined(TARGET_LOONGARCH64)
+        return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1);
+#else
         if (!varTypeIsStruct(gtType) || HasRetBufArg())
         {
             return false;
         }
         // Now it is a struct that is returned in registers.
         return GetReturnTypeDesc()->IsMultiRegRetType();
+#endif
+
 #else  // !FEATURE_MULTIREG_RET
         return false;
 #endif // !FEATURE_MULTIREG_RET
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 3519ac6f7b650..9d4b4131e5d54 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -8513,7 +8513,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool                     allowWideni
         return true;
     }
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     // Jit64 compat:
     if (callerRetType == TYP_VOID)
     {
@@ -10335,7 +10335,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree*                 op,
         return impAssignMultiRegTypeToVar(op, retClsHnd DEBUGARG(unmgdCallConv));
     }
 
-#elif FEATURE_MULTIREG_RET && defined(TARGET_ARM64)
+#elif FEATURE_MULTIREG_RET && (defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
 
     // Is method returning a multi-reg struct?
     if (IsMultiRegReturnedType(retClsHnd, unmgdCallConv))
@@ -11388,12 +11388,32 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
         if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
         {
             // insert an explicit upcast
+#ifdef TARGET_LOONGARCH64
+            if (op1->TypeGet() == TYP_INT && op1->gtOper == GT_CNS_INT)
+            {
+                op1->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal;
+                op1->gtType = TYP_LONG;
+            }
+            else if (op1->gtOper == GT_CNS_INT)
+                *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
+#else
             op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
+#endif
         }
         else if (genActualType(op2->TypeGet()) != TYP_I_IMPL)
         {
             // insert an explicit upcast
+#ifdef TARGET_LOONGARCH64
+            if (op2->TypeGet() == TYP_INT && op2->gtOper == GT_CNS_INT)
+            {
+                op2->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal;
+                op2->gtType = TYP_LONG;
+            }
+            else
+                *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
+#else
             op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
+#endif
         }
 
         type = TYP_I_IMPL;
@@ -12445,7 +12465,17 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 }
 #endif // FEATURE_SIMD
 
+#ifdef TARGET_LOONGARCH64
+                if (prevOpcode == CEE_LDC_I4_0 || prevOpcode == CEE_LDNULL)
+                {
+                    op1->gtType = lclTyp;
+                    op1->gtFlags |= GTF_CONTAINED;
+                }
+                else
+                    op1 = impImplicitIorI4Cast(op1, lclTyp);
+#else
                 op1 = impImplicitIorI4Cast(op1, lclTyp);
+#endif
 
 #ifdef TARGET_64BIT
                 // Downcast the TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity
@@ -13529,7 +13559,15 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 op1 = impPopStack().val; // operand to be shifted
                 impBashVarAddrsToI(op1, op2);
 
+#ifdef TARGET_LOONGARCH64
+                if (op2->gtOper == GT_CNS_INT && op2->AsIntCon()->gtIconVal > 31)
+                {
+                    type = TYP_LONG;
+                } else
+                    type = genActualType(op1->TypeGet());
+#else
                 type = genActualType(op1->TypeGet());
+#endif
                 op1  = gtNewOperNode(oper, type, op1, op2);
 
                 impPushOnStack(op1, tiRetVal);
@@ -13760,10 +13798,26 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 // See also identical code in impGetByRefResultType and STSFLD import.
                 if (varTypeIsI(op1) && (genActualType(op2) == TYP_INT))
                 {
+#ifdef TARGET_LOONGARCH64
+                    if (op2->gtOper == GT_CNS_INT)
+                    {
+                        op2->AsIntCon()->gtIconVal = uns ? (uint32_t)op2->AsIntCon()->gtIconVal : (int32_t)op2->AsIntCon()->gtIconVal;
+                        op2->gtType = TYP_LONG;
+                    }
+                    else
+#endif
                     op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL);
                 }
                 else if (varTypeIsI(op2) && (genActualType(op1) == TYP_INT))
                 {
+#ifdef TARGET_LOONGARCH64
+                    if (op1->gtOper == GT_CNS_INT)
+                    {
+                        op1->AsIntCon()->gtIconVal = uns ? (uint32_t)op1->AsIntCon()->gtIconVal : (int32_t)op1->AsIntCon()->gtIconVal;
+                        op1->gtType = TYP_LONG;
+                    }
+                    else
+#endif
                     op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL);
                 }
 #endif // TARGET_64BIT
@@ -13857,6 +13911,18 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 op1 = impPopStack().val;
 
 #ifdef TARGET_64BIT
+#ifdef TARGET_LOONGARCH64
+                if ((op2->OperGet() == GT_CNS_INT)/* && (op2->AsIntCon()->IconValue() == 0)*/)
+                {
+                    op2->gtType = op1->TypeGet();
+                }
+                /*if (op1->OperGet() == GT_CNS_INT)
+                {
+                    //assert(op1->gtType == op2->TypeGet());
+                    //op2->gtType = op1->TypeGet();
+                    op1->gtFlags |= GTF_CONTAINED;
+                }*/
+#else
                 if ((op1->TypeGet() == TYP_I_IMPL) && (genActualType(op2->TypeGet()) == TYP_INT))
                 {
                     op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, uns ? TYP_U_IMPL : TYP_I_IMPL);
@@ -13865,11 +13931,20 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 {
                     op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, uns ? TYP_U_IMPL : TYP_I_IMPL);
                 }
+#endif
 #endif // TARGET_64BIT
 
+#ifdef TARGET_LOONGARCH64
+                assertImp((genActualType(op1->TypeGet()) == TYP_LONG || genActualType(op1->TypeGet()) == TYP_INT) ||
+                          (genActualType(op2->TypeGet()) == TYP_LONG || genActualType(op2->TypeGet()) == TYP_INT) ||
+                          genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
+                          varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet()) ||
+                          varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType));
+#else
                 assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
                           (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) ||
                           (varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType)));
+#endif
 
                 if (opts.OptimizationEnabled() && (block->bbJumpDest == block->bbNext))
                 {
@@ -14148,6 +14223,17 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 }
 
                 op1 = impPopStack().val;
+#ifdef TARGET_LOONGARCH64
+                if (!callNode && prevOpcode == CEE_LDC_I4_0)
+                {
+                    assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0);
+                    op1->gtType = genActualType(lclTyp);
+                    impPushOnStack(op1, tiRetVal);
+                    //opcode = CEE_LDC_I4_0;
+                    break;
+                }
+#endif
+
                 impBashVarAddrsToI(op1);
 
                 // Casts from floating point types must not have GTF_UNSIGNED set.
@@ -14158,6 +14244,34 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
                 // At this point uns, ovf, callNode are all set.
 
+#ifdef TARGET_LOONGARCH64
+                    if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtOper == GT_CNS_INT)
+                    {
+                        switch (lclTyp)
+                        {
+                            case TYP_BYTE:
+                                op1->AsIntCon()->gtIconVal = (int8_t)op1->AsIntCon()->gtIconVal;
+                                break;
+                            case TYP_UBYTE:
+                                op1->AsIntCon()->gtIconVal = (uint8_t)op1->AsIntCon()->gtIconVal;
+                                break;
+                            case TYP_USHORT:
+                                op1->AsIntCon()->gtIconVal = (uint16_t)op1->AsIntCon()->gtIconVal;
+                                break;
+                            case TYP_SHORT:
+                                op1->AsIntCon()->gtIconVal = (short)op1->AsIntCon()->gtIconVal;
+                                break;
+                            default:
+                                assert(!"unexpected type");
+                                return;
+                        }
+
+                        op1->gtType == TYP_INT;
+
+                        impPushOnStack(op1, tiRetVal);
+                        break;
+                    } else
+#endif
                 if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND)
                 {
                     op2 = op1->AsOp()->gtOp2;
@@ -14222,6 +14336,32 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         op1 = gtNewCastNodeL(type, op1, uns, lclTyp);
                     }
                     else
+#ifdef TARGET_LOONGARCH64
+                    if (type != TYP_LONG)
+                    {
+                        if (!ovfl && op1->gtOper == GT_CNS_INT && op1->TypeGet() == TYP_LONG)
+                        {
+                            assert(lclTyp == TYP_INT || lclTyp == TYP_UINT);
+                            if (lclTyp == TYP_INT)
+                            {
+                                op1->AsIntCon()->gtIconVal = (int32_t)op1->AsIntCon()->gtIconVal;
+                                op1->gtType = TYP_INT;
+                            }
+                            else if (lclTyp == TYP_UINT)
+                            {
+                                op1->AsIntCon()->gtIconVal = (uint32_t)op1->AsIntCon()->gtIconVal;
+                                op1->gtType = TYP_UINT;
+                            }
+                            else
+                                op1 = gtNewCastNode(type, op1, uns, lclTyp);
+                        }
+                        else
+                        {
+                            op1 = gtNewCastNode(type, op1, uns, lclTyp);
+                        }
+                    }
+                    else
+#endif
                     {
                         op1 = gtNewCastNode(type, op1, uns, lclTyp);
                     }
@@ -14231,11 +14371,13 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         op1->gtFlags |= (GTF_OVERFLOW | GTF_EXCEPT);
                     }
 
+#ifndef TARGET_LOONGARCH64
                     if (op1->gtGetOp1()->OperIsConst() && opts.OptimizationEnabled())
                     {
                         // Try and fold the introduced cast
                         op1 = gtFoldExprConst(op1);
                     }
+#endif
                 }
 
                 impPushOnStack(op1, tiRetVal);
@@ -16051,6 +16193,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         op2->gtType = TYP_I_IMPL;
                     }
                     else
+#ifdef TARGET_LOONGARCH64
+                    if (genActualType(op2->TypeGet()) != TYP_INT)
+#endif
                     {
                         bool isUnsigned = false;
                         op2             = gtNewCastNode(TYP_I_IMPL, op2, isUnsigned, TYP_I_IMPL);
@@ -17644,6 +17789,9 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
             op2 = impImplicitR4orR8Cast(op2, info.compRetType);
             // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF.
             assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
+#ifdef TARGET_LOONGARCH64
+                      genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType) ||
+#endif
                       ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) ||
                       (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) ||
                       (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) ||
@@ -17872,7 +18020,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
                     }
                 }
                 else
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
                 ReturnTypeDesc retTypeDesc;
                 retTypeDesc.InitializeStructReturnType(this, retClsHnd, info.compCallConv);
                 unsigned retRegCount = retTypeDesc.GetReturnRegCount();
@@ -18515,6 +18663,14 @@ void Compiler::impImportBlock(BasicBlock* block)
             {
                 // Spill clique has decided this should be "native int", but this block only pushes an "int".
                 // Insert a sign-extension to "native int" so we match the clique.
+#ifdef TARGET_LOONGARCH64
+                if (tree->gtOper == GT_CNS_INT/* && !tree->AsIntCon()->gtIconVal*/)
+                {
+                    tree->gtType = TYP_I_IMPL;
+                    tree->SetContained();
+                }
+                else
+#endif
                 verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
             }
 
@@ -18539,6 +18695,14 @@ void Compiler::impImportBlock(BasicBlock* block)
                 {
                     // Spill clique has decided this should be "byref", but this block only pushes an "int".
                     // Insert a sign-extension to "native int" so we match the clique size.
+#ifdef TARGET_LOONGARCH64
+                    if (tree->gtOper == GT_CNS_INT /*&& !tree->AsIntCon()->gtIconVal*/)
+                    {
+                        tree->gtType = TYP_I_IMPL;
+                        tree->SetContained();
+                    }
+                    else
+#endif
                     verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
                 }
             }
@@ -21351,6 +21515,14 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
         case NI_System_Math_Sqrt:
             return true;
 
+        default:
+            return false;
+    }
+#elif defined(TARGET_LOONGARCH64)
+    switch (intrinsicName)
+    {
+        // LOONGARCH64: will amend in the future
+
         default:
             return false;
     }
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index bbf204c74caa8..62b2eade19cec 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -66,6 +66,10 @@ const char* CodeGen::genInsName(instruction ins)
         #define INST9(id, nm, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm,
         #include "instrs.h"
 
+#elif defined(TARGET_LOONGARCH64)
+        #define INSTS(id, nm, fp, ldst, fmt, e1) nm,
+        #include "instrs.h"
+
 #else
 #error "Unknown TARGET"
 #endif
@@ -434,6 +438,21 @@ void CodeGen::inst_Mov(var_types dstType,
                        emitAttr  size,
                        insFlags  flags /* = INS_FLAGS_DONT_CARE */)
 {
+#ifdef TARGET_LOONGARCH64
+    if (isFloatRegType(dstType) != genIsValidFloatReg(dstReg))
+    {
+        if (dstType == TYP_FLOAT)
+            dstType = TYP_INT;
+        else if (dstType == TYP_DOUBLE)
+            dstType = TYP_LONG;
+        else if (dstType == TYP_INT)
+            dstType = TYP_FLOAT;
+        else if (dstType == TYP_LONG)
+            dstType = TYP_DOUBLE;
+        else
+            assert(!"unimplemented on LOONGARCH yet");
+    }
+#endif
     instruction ins = ins_Copy(srcReg, dstType);
 
     if (size == EA_UNKNOWN)
@@ -523,6 +542,8 @@ void CodeGen::inst_RV_RV_RV(instruction ins,
 {
 #ifdef TARGET_ARM
     GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3, flags);
+#elif defined(TARGET_LOONGARCH64)
+    GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3);
 #elif defined(TARGET_XARCH)
     GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3);
 #else
@@ -599,6 +620,8 @@ void CodeGen::inst_RV_IV(
     assert(ins != INS_tst);
     assert(ins != INS_mov);
     GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val);
+#elif defined(TARGET_LOONGARCH64)
+    GetEmitter()->emitIns_R_R_I(ins, size, reg, reg, val);
 #else // !TARGET_ARM
 #ifdef TARGET_AMD64
     // Instead of an 8-byte immediate load, a 4-byte immediate will do fine
@@ -878,6 +901,15 @@ void CodeGen::inst_RV_TT(instruction ins,
                     return;
             }
 #else  // !TARGET_ARM
+#ifdef TARGET_LOONGARCH64
+            if (emitter::isFloatReg(reg))
+                assert((ins==INS_fld_d) || (ins==INS_fld_s));
+            else if (emitter::isGeneralRegister(reg) && (ins != INS_lea))
+            {//TODO should amend for LOONGARCH64 !!!
+                //assert((ins==INS_ld_d) || (ins==INS_ld_w));
+                ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d;
+            }
+#endif
             GetEmitter()->emitIns_R_S(ins, size, reg, varNum, offs);
             return;
 #endif // !TARGET_ARM
@@ -1442,6 +1474,13 @@ bool CodeGenInterface::validImmForBL(ssize_t addr)
 }
 #endif // TARGET_ARM64
 
+#if defined(TARGET_LOONGARCH64)
+bool CodeGenInterface::validImmForBAL(ssize_t addr)
+{//TODO: can amend/optimize for LoongArch64.
+    return false;
+}
+#endif // TARGET_LOONGARCH64
+
 /*****************************************************************************
  *
  *  Get the machine dependent instruction for performing sign/zero extension.
@@ -1452,6 +1491,10 @@ bool CodeGenInterface::validImmForBL(ssize_t addr)
  */
 instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg)
 {
+#ifdef TARGET_LOONGARCH64
+    assert(!"unimplemented yet on LoongArch64 for unused.");
+#endif
+
     instruction ins = INS_invalid;
 
     if (varTypeIsSIMD(srcType))
@@ -1633,6 +1676,8 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
         }
 #elif defined(TARGET_ARM64)
         return INS_ldr;
+//#elif defined(TARGET_LOONGARCH64)
+//        //TODO: add SIMD for LoongArch64.
 #else
         assert(!"ins_Load with SIMD type");
 #endif
@@ -1657,6 +1702,19 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
         return INS_ldr;
 #elif defined(TARGET_ARM)
         return INS_vldr;
+#elif defined(TARGET_LOONGARCH64)
+        if (srcType == TYP_DOUBLE)
+        {
+            return INS_fld_d;
+        }
+        else if (srcType == TYP_FLOAT)
+        {
+            return INS_fld_s;
+        }
+        else
+        {
+            assert(!"unhandled floating type");
+        }
 #else
         assert(!varTypeIsFloating(srcType));
 #endif
@@ -1695,6 +1753,34 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
         else
             ins = INS_ldrsh;
     }
+#elif defined(TARGET_LOONGARCH64)
+    if (varTypeIsByte(srcType))
+    {
+        if (varTypeIsUnsigned(srcType))
+            ins = INS_ld_bu;
+        else
+            ins = INS_ld_b;
+    }
+    else if (varTypeIsShort(srcType))
+    {
+        if (varTypeIsUnsigned(srcType))
+            ins = INS_ld_hu;
+        else
+            ins = INS_ld_h;
+    }
+    else if (TYP_INT == srcType)
+    {
+        ins = INS_ld_w;
+    }
+    else if (TYP_UINT == srcType)
+    {
+        ins = INS_ld_wu;
+    }
+    else
+    {
+        //assert((TYP_LONG == srcType) || (TYP_ULONG == srcType));
+        ins = INS_ld_d;//default ld_d.
+    }
 #else
     NYI("ins_Load");
 #endif
@@ -1746,6 +1832,15 @@ instruction CodeGen::ins_Copy(var_types dstType)
     {
         return INS_mov;
     }
+#elif defined(TARGET_LOONGARCH64)
+    if (varTypeIsFloating(dstType))
+    {
+        return dstType == TYP_FLOAT ? INS_fmov_s : INS_fmov_d;
+    }
+    else
+    {
+        return INS_mov;
+    }
 #else // TARGET_*
 #error "Unknown TARGET_"
 #endif
@@ -1797,6 +1892,19 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType)
         assert(dstType == TYP_INT);
         return INS_vmov_f2i;
     }
+#elif defined(TARGET_LOONGARCH64)
+    // No SIMD support yet.
+    assert(!varTypeIsSIMD(dstType));
+    if (dstIsFloatReg)
+    {
+        assert(!genIsValidFloatReg(srcReg));
+        return dstType == TYP_FLOAT ? INS_movgr2fr_w : INS_movgr2fr_d;
+    }
+    else
+    {
+        assert(genIsValidFloatReg(srcReg));
+        return EA_SIZE(emitActualTypeSize(dstType)) == EA_4BYTE ? INS_movfr2gr_s : INS_movfr2gr_d;
+    }
 #else // TARGET*
 #error "Unknown TARGET"
 #endif
@@ -1863,6 +1971,19 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
     {
         return INS_vstr;
     }
+#elif defined(TARGET_LOONGARCH64)
+    assert(!varTypeIsSIMD(dstType));
+    if (varTypeIsFloating(dstType))
+    {
+        if (dstType == TYP_DOUBLE)
+        {
+            return INS_fst_d;
+        }
+        else if (dstType == TYP_FLOAT)
+        {
+            return INS_fst_s;
+        }
+    }
 #else
     assert(!varTypeIsSIMD(dstType));
     assert(!varTypeIsFloating(dstType));
@@ -1877,6 +1998,15 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
         ins = INS_strb;
     else if (varTypeIsShort(dstType))
         ins = INS_strh;
+#elif defined(TARGET_LOONGARCH64)
+    if (varTypeIsByte(dstType))
+        ins = INS_st_b;
+    else if (varTypeIsShort(dstType))
+        ins = INS_st_h;
+    else if ((TYP_INT == dstType) || (TYP_UINT == dstType))
+        ins = INS_st_w;
+    else //if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType))
+        ins = INS_st_d;//default st_d.
 #else
     NYI("ins_Store");
 #endif
@@ -2152,6 +2282,8 @@ void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags fla
     GetEmitter()->emitIns_R_R(INS_xor, size, reg, reg);
 #elif defined(TARGET_ARMARCH)
     GetEmitter()->emitIns_R_I(INS_mov, size, reg, 0 ARM_ARG(flags));
+#elif defined(TARGET_LOONGARCH64)
+    GetEmitter()->emitIns_R_R_I(INS_ori, size, reg, REG_R0, 0);
 #else
 #error "Unknown TARGET"
 #endif
diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h
index 1e9302cf503e8..dc0f0a3c925b4 100644
--- a/src/coreclr/jit/instr.h
+++ b/src/coreclr/jit/instr.h
@@ -6,7 +6,11 @@
 #define _INSTR_H_
 /*****************************************************************************/
 
+#ifdef TARGET_LOONGARCH64
+#define BAD_CODE 0XFFFFFFFF
+#else
 #define BAD_CODE 0x0BADC0DE // better not match a real encoding!
+#endif
 
 /*****************************************************************************/
 
@@ -47,6 +51,14 @@ enum instruction : unsigned
 
     INS_lea,   // Not a real instruction. It is used for load the address of stack locals
 
+#elif defined(TARGET_LOONGARCH64)
+    #define INSTS(id, nm, fp, ldst, fmt, e1) INS_##id,
+    #include "instrs.h"
+
+    //INS_dneg,  // Not a real instruction. It will be translated to dsubu.
+    //INS_neg,   // Not a real instruction. It will be translated to subu.
+    //INS_not,   // Not a real instruction. It will be translated to nor.
+    INS_lea,   // Not a real instruction. It is used for load the address of stack locals
 #else
 #error Unsupported target architecture
 #endif
@@ -144,6 +156,13 @@ enum insFlags: unsigned
     INS_FLAGS_SET = 0x01,
     INS_FLAGS_DONT_CARE = 0x02,
 };
+#elif defined(TARGET_LOONGARCH64)
+enum insFlags: unsigned
+{
+    INS_FLAGS_NOT_SET = 0x00,
+    INS_FLAGS_SET = 0x01,
+    INS_FLAGS_DONT_CARE = 0x02,
+};
 #else
 #error Unsupported target architecture
 #endif
@@ -275,7 +294,6 @@ enum insBarrier : unsigned
     INS_BARRIER_OSHLD =  1,
     INS_BARRIER_OSHST =  2,
     INS_BARRIER_OSH   =  3,
-
     INS_BARRIER_NSHLD =  5,
     INS_BARRIER_NSHST =  6,
     INS_BARRIER_NSH   =  7,
@@ -288,6 +306,33 @@ enum insBarrier : unsigned
     INS_BARRIER_ST    = 14,
     INS_BARRIER_SY    = 15,
 };
+#elif defined(TARGET_LOONGARCH64)
+enum insOpts : unsigned
+{
+    INS_OPTS_NONE,
+
+    INS_OPTS_RC,     // see ::emitIns_R_C().
+    INS_OPTS_RL,     // see ::emitIns_R_L().
+    INS_OPTS_JIRL,     // see ::emitIns_J_R().
+    INS_OPTS_J,      // see ::emitIns_J().
+    INS_OPTS_J_cond, // see ::emitIns_J_cond_la().
+    INS_OPTS_I,      // see ::emitIns_I_la().
+    //INS_OPTS_J2,   // see ::emitIns_J().
+    INS_OPTS_C,      // see ::emitIns_Call().
+    INS_OPTS_RELOC,  // see ::emitIns_R_AI().
+    //INS_OPTS_,     // see ::().
+    //INS_OPTS_,     // see ::().
+};
+
+enum insBarrier : unsigned
+{
+    INS_BARRIER_FULL  =  0,
+    INS_BARRIER_WMB   =  INS_BARRIER_FULL,//4,
+    INS_BARRIER_MB    =  INS_BARRIER_FULL,//16,
+    INS_BARRIER_ACQ   =  INS_BARRIER_FULL,//17,
+    INS_BARRIER_REL   =  INS_BARRIER_FULL,//18,
+    INS_BARRIER_RMB   =  INS_BARRIER_FULL,//19,
+};
 #endif
 
 #undef EA_UNKNOWN
diff --git a/src/coreclr/jit/instrs.h b/src/coreclr/jit/instrs.h
index b543f781645f5..aa16547f44be7 100644
--- a/src/coreclr/jit/instrs.h
+++ b/src/coreclr/jit/instrs.h
@@ -7,6 +7,8 @@
 #include "instrsarm.h"
 #elif defined(TARGET_ARM64)
 #include "instrsarm64.h"
+#elif defined(TARGET_LOONGARCH64)
+#include "instrsloongarch64.h"
 #else
 #error Unsupported or unset target architecture
 #endif // target type
diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h
new file mode 100644
index 0000000000000..1c16d53fd453a
--- /dev/null
+++ b/src/coreclr/jit/instrsloongarch64.h
@@ -0,0 +1,499 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Copyright (c) Loongson Technology. All rights reserved.
+
+/*****************************************************************************
+ *  Loongarch64 instructions for JIT compiler
+ *
+ *          id      -- the enum name for the instruction
+ *          nm      -- textual name (for assembly dipslay)
+ *          fp      -- floating point instruction
+ *          ld/st/cmp   -- load/store/compare instruction
+ *          fmt     -- encoding format used by this instruction
+ *          e1      -- encoding 1
+ *          e2      -- encoding 2
+ *          e3      -- encoding 3
+ *          e4      -- encoding 4
+ *          e5      -- encoding 5
+ *
+******************************************************************************/
+
+#if !defined(TARGET_LOONGARCH64)
+#error Unexpected target type
+#endif
+
+#ifndef INSTS
+#error INSTS must be defined before including this file.
+#endif
+
+/*****************************************************************************/
+/*               The following is LOONGARCH64-specific                               */
+/*****************************************************************************/
+
+// If you're adding a new instruction:
+// You need not only to fill in one of these macros describing the instruction, but also:
+//   * If the instruction writes to more than one destination register, update the function
+//     emitInsMayWriteMultipleRegs in emitLoongarch64.cpp.
+
+// clang-format off
+INSTS(invalid, "INVALID", 0, 0, IF_NONE,  BAD_CODE)
+
+
+INSTS(nop ,	"nop",	0,	0,	IF_LA,	0x03400000)
+
+////INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number.
+INSTS(bceqz,	"bceqz",	0,	0,	IF_LA,	0x48000000)
+INSTS(bcnez,	"bcnez",	0,	0,	IF_LA,	0x48000100)
+
+INSTS(beq,	"beq",	0,	0,	IF_LA,	0x58000000)
+INSTS(bne,	"bne",	0,	0,	IF_LA,	0x5c000000)
+
+INSTS(blt,	"blt",	0,	0,	IF_LA,	0x60000000)
+INSTS(bge,	"bge",	0,	0,	IF_LA,	0x64000000)
+INSTS(bltu,	"bltu",	0,	0,	IF_LA,	0x68000000)
+INSTS(bgeu,	"bgeu",	0,	0,	IF_LA,	0x6c000000)
+
+////R_I.
+INSTS(beqz,	"beqz",	0,	0,	IF_LA,	0x40000000)
+INSTS(bnez,	"bnez",	0,	0,	IF_LA,	0x44000000)
+
+////I.
+INSTS(b,	"b",	0,	0,	IF_LA,	0x50000000)
+INSTS(bl,	"bl",	0,	0,	IF_LA,	0x54000000)
+
+////////////////////////////////////////////////
+////NOTE:  Begin
+////     the fllowing instructions will be used by emitter::emitInsMayWriteToGCReg().
+////////////////////////////////////////////////
+//    enum     name     FP LD/ST   FMT   ENCODE
+
+////NOTE: mov must be the first one !!! more info to see emitter::emitInsMayWriteToGCReg().
+INSTS(mov,     "mov",    0, 0, IF_LA, 0x03800000)
+      //  mov     rd,rj
+      //NOTE: On loongarch, usually it's name is move, but here for compatible using mov.
+      //      In fact, mov is an alias commond, "ori rd,rj,0"
+INSTS(dneg,            "dneg",         0, 0, IF_LA,  0x00118000)
+        //dneg is a alias instruction.
+        //sub_d rd, zero, rk
+INSTS(neg,             "neg",          0, 0, IF_LA,  0x00110000)
+        //neg is a alias instruction.
+        //sub_w rd, zero, rk
+INSTS(not,             "not",          0, 0, IF_LA,  0x00140000)
+        //not is a alias instruction.
+        //nor rd, rj, zero
+
+//    enum:id          name     FP LD/ST   Formate     ENCODE
+////R_R_R.
+INSTS(add_w,	"add.w",	0,	0,	IF_LA,	0x00100000)
+INSTS(add_d,	"add.d",	0,	0,	IF_LA,	0x00108000)
+INSTS(sub_w,	"sub.w",	0,	0,	IF_LA,	0x00110000)
+INSTS(sub_d,	"sub.d",	0,	0,	IF_LA,	0x00118000)
+
+INSTS(and,	"and",	0,	0,	IF_LA,	0x00148000)
+INSTS(or,	"or",	0,	0,	IF_LA,	0x00150000)
+INSTS(nor,	"nor",	0,	0,	IF_LA,	0x00140000)
+INSTS(xor,	"xor",	0,	0,	IF_LA,	0x00158000)
+INSTS(andn,	"andn",	0,	0,	IF_LA,	0x00168000)
+INSTS(orn,	"orn",	0,	0,	IF_LA,	0x00160000)
+
+INSTS(mul_w,	"mul.w",	0,	0,	IF_LA,	0x001c0000)
+INSTS(mul_d,	"mul.d",	0,	0,	IF_LA,	0x001d8000)
+INSTS(mulh_w,	"mulh.w",	0,	0,	IF_LA,	0x001c8000)
+INSTS(mulh_wu,	"mulh.wu",	0,	0,	IF_LA,	0x001d0000)
+INSTS(mulh_d,	"mulh.d",	0,	0,	IF_LA,	0x001e0000)
+INSTS(mulh_du,	"mulh.du",	0,	0,	IF_LA,	0x001e8000)
+INSTS(mulw_d_w,	"mulw.d.w",	0,	0,	IF_LA,	0x001f0000)
+INSTS(mulw_d_wu,	"mulw.d.wu",	0,	0,	IF_LA,	0x001f8000)
+INSTS(div_w,	"div.w",	0,	0,	IF_LA,	0x00200000)
+INSTS(div_wu,	"div.wu",	0,	0,	IF_LA,	0x00210000)
+INSTS(div_d,	"div.d",	0,	0,	IF_LA,	0x00220000)
+INSTS(div_du,	"div.du",	0,	0,	IF_LA,	0x00230000)
+INSTS(mod_w,	"mod.w",	0,	0,	IF_LA,	0x00208000)
+INSTS(mod_wu,	"mod.wu",	0,	0,	IF_LA,	0x00218000)
+INSTS(mod_d,	"mod.d",	0,	0,	IF_LA,	0x00228000)
+INSTS(mod_du,	"mod.du",	0,	0,	IF_LA,	0x00238000)
+
+INSTS(sll_w,	"sll.w",	0,	0,	IF_LA,	0x00170000)
+INSTS(srl_w,	"srl.w",	0,	0,	IF_LA,	0x00178000)
+INSTS(sra_w,	"sra.w",	0,	0,	IF_LA,	0x00180000)
+INSTS(rotr_w,	"rotr_w",	0,	0,	IF_LA,	0x001b0000)
+INSTS(sll_d,	"sll.d",	0,	0,	IF_LA,	0x00188000)
+INSTS(srl_d,	"srl.d",	0,	0,	IF_LA,	0x00190000)
+INSTS(sra_d,	"sra.d",	0,	0,	IF_LA,	0x00198000)
+INSTS(rotr_d,	"rotr.d",	0,	0,	IF_LA,	0x001b8000)
+
+INSTS(maskeqz,	"maskeqz",	0,	0,	IF_LA,	0x00130000)
+INSTS(masknez,	"masknez",	0,	0,	IF_LA,	0x00138000)
+
+INSTS(slt,	"slt",	0,	0,	IF_LA,	0x00120000)
+INSTS(sltu,	"sltu",	0,	0,	IF_LA,	0x00128000)
+
+INSTS(amswap_w,	"amswap.w",	0,	0,	IF_LA,	0x38600000)
+INSTS(amswap_d,	"amswap.d",	0,	0,	IF_LA,	0x38608000)
+INSTS(amswap_db_w,	"amswap_db.w",	0,	0,	IF_LA,	0x38690000)
+INSTS(amswap_db_d,	"amswap_db.d",	0,	0,	IF_LA,	0x38698000)
+INSTS(amadd_w,	"amadd.w",	0,	0,	IF_LA,	0x38610000)
+INSTS(amadd_d,	"amadd.d",	0,	0,	IF_LA,	0x38618000)
+INSTS(amadd_db_w,	"amadd_db.w",	0,	0,	IF_LA,	0x386a0000)
+INSTS(amadd_db_d,	"amadd_db.d",	0,	0,	IF_LA,	0x386a8000)
+INSTS(amand_w,	"amand.w",	0,	0,	IF_LA,	0x38620000)
+INSTS(amand_d,	"amand.d",	0,	0,	IF_LA,	0x38628000)
+INSTS(amand_db_w,	"amand_db.w",	0,	0,	IF_LA,	0x386b0000)
+INSTS(amand_db_d,	"amand_db.d",	0,	0,	IF_LA,	0x386b8000)
+INSTS(amor_w,	"amor.w",	0,	0,	IF_LA,	0x38630000)
+INSTS(amor_d,	"amor.d",	0,	0,	IF_LA,	0x38638000)
+INSTS(amor_db_w,	"amor_db.w",	0,	0,	IF_LA,	0x386c0000)
+INSTS(amor_db_d,	"amor_db.d",	0,	0,	IF_LA,	0x386c8000)
+INSTS(amxor_w,	"amxor.w",	0,	0,	IF_LA,	0x38640000)
+INSTS(amxor_d,	"amxor.d",	0,	0,	IF_LA,	0x38648000)
+INSTS(amxor_db_w,	"amxor_db.w",	0,	0,	IF_LA,	0x386d0000)
+INSTS(amxor_db_d,	"amxor_db.d",	0,	0,	IF_LA,	0x386d8000)
+INSTS(ammax_w,	"ammax.w",	0,	0,	IF_LA,	0x38650000)
+INSTS(ammax_d,	"ammax.d",	0,	0,	IF_LA,	0x38658000)
+INSTS(ammax_db_w,	"ammax_db.w",	0,	0,	IF_LA,	0x386e0000)
+INSTS(ammax_db_d,	"ammax_db.d",	0,	0,	IF_LA,	0x386e8000)
+INSTS(ammin_w,	"ammin.w",	0,	0,	IF_LA,	0x38660000)
+INSTS(ammin_d,	"ammin.d",	0,	0,	IF_LA,	0x38668000)
+INSTS(ammin_db_w,	"ammin_db.w",	0,	0,	IF_LA,	0x386f0000)
+INSTS(ammin_db_d,	"ammin_db.d",	0,	0,	IF_LA,	0x386f8000)
+INSTS(ammax_wu,	"ammax.wu",	0,	0,	IF_LA,	0x38670000)
+INSTS(ammax_du,	"ammax.du",	0,	0,	IF_LA,	0x38678000)
+INSTS(ammax_db_wu,	"ammax_db.wu",	0,	0,	IF_LA,	0x38700000)
+INSTS(ammax_db_du,	"ammax_db.du",	0,	0,	IF_LA,	0x38708000)
+INSTS(ammin_wu,	"ammin.wu",	0,	0,	IF_LA,	0x38680000)
+INSTS(ammin_du,	"ammin.du",	0,	0,	IF_LA,	0x38688000)
+INSTS(ammin_db_wu,	"ammin_db.wu",	0,	0,	IF_LA,	0x38710000)
+INSTS(ammin_db_du,	"ammin_db.du",	0,	0,	IF_LA,	0x38718000)
+
+INSTS(crc_w_b_w,	"crc.w.b.w",	0,	0,	IF_LA,	0x00240000)
+INSTS(crc_w_h_w,	"crc.w.h.w",	0,	0,	IF_LA,	0x00248000)
+INSTS(crc_w_w_w,	"crc.w.w.w",	0,	0,	IF_LA,	0x00250000)
+INSTS(crc_w_d_w,	"crc.w.d.w",	0,	0,	IF_LA,	0x00258000)
+INSTS(crcc_w_b_w,	"crcc.w.b.w",	0,	0,	IF_LA,	0x00260000)
+INSTS(crcc_w_h_w,	"crcc.w.h.w",	0,	0,	IF_LA,	0x00268000)
+INSTS(crcc_w_w_w,	"crcc.w.w.w",	0,	0,	IF_LA,	0x00270000)
+INSTS(crcc_w_d_w,	"crcc.w.d.w",	0,	0,	IF_LA,	0x00278000)
+
+////R_R_R_I.
+INSTS(alsl_w,	"alsl.w",	0,	0,	IF_LA,	0x00040000)
+INSTS(alsl_wu,	"alsl.wu",	0,	0,	IF_LA,	0x00060000)
+INSTS(alsl_d,	"alsl.d",	0,	0,	IF_LA,	0x002c0000)
+
+INSTS(bytepick_w,	"bytepick.w",	0,	0,	IF_LA,	0x00080000)
+INSTS(bytepick_d,	"bytepick.d",	0,	0,	IF_LA,	0x000c0000)
+
+INSTS(fsel,	"fsel",	0,	0,	IF_LA,	0x0d000000)
+
+////R_I.
+INSTS(lu12i_w,	"lu12i.w",	0,	0,	IF_LA,	0x14000000)
+INSTS(lu32i_d,	"lu32i.d",	0,	0,	IF_LA,	0x16000000)
+
+INSTS(pcaddi,	"pcaddi",	0,	0,	IF_LA,	0x18000000)
+INSTS(pcaddu12i,	"pcaddu12i",	0,	0,	IF_LA,	0x1c000000)
+INSTS(pcalau12i,	"pcalau12i",	0,	0,	IF_LA,	0x1a000000)
+INSTS(pcaddu18i,	"pcaddu18i",	0,	0,	IF_LA,	0x1e000000)
+
+////R_R.
+INSTS(ext_w_b,	"ext.w.b",	0,	0,	IF_LA,	0x00005c00)
+INSTS(ext_w_h,	"ext.w.h",	0,	0,	IF_LA,	0x00005800)
+INSTS(clo_w,	"clo.w",	0,	0,	IF_LA,	0x00001000)
+INSTS(clz_w,	"clz.w",	0,	0,	IF_LA,	0x00001400)
+INSTS(cto_w,	"cto.w",	0,	0,	IF_LA,	0x00001800)
+INSTS(ctz_w,	"ctz.w",	0,	0,	IF_LA,	0x00001c00)
+INSTS(clo_d,	"clo.d",	0,	0,	IF_LA,	0x00002000)
+INSTS(clz_d,	"clz.d",	0,	0,	IF_LA,	0x00002400)
+INSTS(cto_d,	"cto.d",	0,	0,	IF_LA,	0x00002800)
+INSTS(ctz_d,	"ctz.d",	0,	0,	IF_LA,	0x00002c00)
+INSTS(revb_2h,	"revb.2h",	0,	0,	IF_LA,	0x00003000)
+INSTS(revb_4h,	"revb.4h",	0,	0,	IF_LA,	0x00003400)
+INSTS(revb_2w,	"revb.2w",	0,	0,	IF_LA,	0x00003800)
+INSTS(revb_d,	"revb.d",	0,	0,	IF_LA,	0x00003c00)
+INSTS(revh_2w,	"revh.2w",	0,	0,	IF_LA,	0x00004000)
+INSTS(revh_d,	"revh.d",	0,	0,	IF_LA,	0x00004400)
+INSTS(bitrev_4b,	"bitrev.4b",	0,	0,	IF_LA,	0x00004800)
+INSTS(bitrev_8b,	"bitrev.8b",	0,	0,	IF_LA,	0x00004c00)
+INSTS(bitrev_w,	"bitrev.w",	0,	0,	IF_LA,	0x00005000)
+INSTS(bitrev_d,	"bitrev.d",	0,	0,	IF_LA,	0x00005400)
+INSTS(rdtimel_w,	"rdtimel.w",	0,	0,	IF_LA,	0x00006000)
+INSTS(rdtimeh_w,	"rdtimeh.w",	0,	0,	IF_LA,	0x00006400)
+INSTS(rdtime_d,	"rdtime.d",	0,	0,	IF_LA,	0x00006800)
+INSTS(cpucfg,	"cpucfg",	0,	0,	IF_LA,	0x00006c00)
+
+////R_R_I_I.
+INSTS(bstrins_w,	"bstrins.w",	0,	0,	IF_LA,	0x00600000)
+INSTS(bstrins_d,	"bstrins.d",	0,	0,	IF_LA,	0x00800000)
+INSTS(bstrpick_w,	"bstrpick.w",	0,	0,	IF_LA,	0x00608000)
+INSTS(bstrpick_d,	"bstrpick.d",	0,	0,	IF_LA,	0x00c00000)
+
+////Load.
+INSTS(ld_b,	"ld.b",	0,	LD,	IF_LA,	0x28000000)
+INSTS(ld_h,	"ld.h",	0,	LD,	IF_LA,	0x28400000)
+INSTS(ld_w,	"ld.w",	0,	LD,	IF_LA,	0x28800000)
+INSTS(ld_d,	"ld.d",	0,	LD,	IF_LA,	0x28c00000)
+INSTS(ld_bu,	"ld.bu",	0,	LD,	IF_LA,	0x2a000000)
+INSTS(ld_hu,	"ld.hu",	0,	LD,	IF_LA,	0x2a400000)
+INSTS(ld_wu,	"ld.wu",	0,	LD,	IF_LA,	0x2a800000)
+
+INSTS(ldptr_w,	"ldptr.w",	0,	LD,	IF_LA,	0x24000000)
+INSTS(ldptr_d,	"ldptr.d",	0,	LD,	IF_LA,	0x26000000)
+INSTS(ll_w,	"ll.w",	0,	0,	IF_LA,	0x20000000)
+INSTS(ll_d,	"ll.d",	0,	0,	IF_LA,	0x22000000)
+
+INSTS(ldx_b,	"ldx.b",	0,	LD,	IF_LA,	0x38000000)
+INSTS(ldx_h,	"ldx.h",	0,	LD,	IF_LA,	0x38040000)
+INSTS(ldx_w,	"ldx.w",	0,	LD,	IF_LA,	0x38080000)
+INSTS(ldx_d,	"ldx.d",	0,	LD,	IF_LA,	0x380c0000)
+INSTS(ldx_bu,	"ldx.bu",	0,	LD,	IF_LA,	0x38200000)
+INSTS(ldx_hu,	"ldx.hu",	0,	LD,	IF_LA,	0x38240000)
+INSTS(ldx_wu,	"ldx.wu",	0,	LD,	IF_LA,	0x38280000)
+
+INSTS(ldgt_b,	"ldgt.b",	0,	0,	IF_LA,	0x38780000)
+INSTS(ldgt_h,	"ldgt.h",	0,	0,	IF_LA,	0x38788000)
+INSTS(ldgt_w,	"ldgt.w",	0,	0,	IF_LA,	0x38790000)
+INSTS(ldgt_d,	"ldgt.d",	0,	0,	IF_LA,	0x38798000)
+INSTS(ldle_b,	"ldle.b",	0,	0,	IF_LA,	0x387a0000)
+INSTS(ldle_h,	"ldle.h",	0,	0,	IF_LA,	0x387a8000)
+INSTS(ldle_w,	"ldle.w",	0,	0,	IF_LA,	0x387b0000)
+INSTS(ldle_d,	"ldle.d",	0,	0,	IF_LA,	0x387b8000)
+
+////R_R_I.
+INSTS(addi_w,	"addi.w",	0,	0,	IF_LA,	0x02800000)
+INSTS(addi_d,	"addi.d",	0,	0,	IF_LA,	0x02c00000)
+INSTS(lu52i_d,	"lu52i.d",	0,	0,	IF_LA,	0x03000000)
+INSTS(slti,	"slti",	0,	0,	IF_LA,	0x02000000)
+
+INSTS(sltui,	"sltui",	0,	0,	IF_LA,	0x02400000)
+INSTS(andi,	"andi",	0,	0,	IF_LA,	0x03400000)
+INSTS(ori,	"ori",	0,	0,	IF_LA,	0x03800000)
+INSTS(xori,	"xori",	0,	0,	IF_LA,	0x03c00000)
+
+INSTS(slli_w,	"slli.w",	0,	0,	IF_LA,	0x00408000)
+INSTS(srli_w,	"srli.w",	0,	0,	IF_LA,	0x00448000)
+INSTS(srai_w,	"srai.w",	0,	0,	IF_LA,	0x00488000)
+INSTS(rotri_w,	"rotri.w",	0,	0,	IF_LA,	0x004c8000)
+INSTS(slli_d,	"slli.d",	0,	0,	IF_LA,	0x00410000)
+INSTS(srli_d,	"srli.d",	0,	0,	IF_LA,	0x00450000)
+INSTS(srai_d,	"srai.d",	0,	0,	IF_LA,	0x00490000)
+INSTS(rotri_d,	"rotri.d",	0,	0,	IF_LA,	0x004d0000)
+
+INSTS(addu16i_d,	"addu16i.d",	0,	0,	IF_LA,	0x10000000)
+
+INSTS(jirl,	"jirl",	0,	0,	IF_LA,	0x4c000000)
+
+////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg().
+////////////////////////////////////////////////
+////NOTE:  End
+////     the above instructions will be used by emitter::emitInsMayWriteToGCReg().
+////////////////////////////////////////////////
+////Store.
+INSTS(st_b,	"st.b",	0,	ST,	IF_LA,	0x29000000)
+INSTS(st_h,	"st.h",	0,	ST,	IF_LA,	0x29400000)
+INSTS(st_w,	"st.w",	0,	ST,	IF_LA,	0x29800000)
+INSTS(st_d,	"st.d",	0,	ST,	IF_LA,	0x29c00000)
+
+INSTS(stptr_w,	"stptr.w",	0,	ST,	IF_LA,	0x25000000)
+INSTS(stptr_d,	"stptr.d",	0,	ST,	IF_LA,	0x27000000)
+INSTS(sc_w,	"sc.w",	0,	0,	IF_LA,	0x21000000)
+INSTS(sc_d,	"sc.d",	0,	0,	IF_LA,	0x23000000)
+
+INSTS(stx_b,	"stx.b",	0,	ST,	IF_LA,	0x38100000)
+INSTS(stx_h,	"stx.h",	0,	ST,	IF_LA,	0x38140000)
+INSTS(stx_w,	"stx.w",	0,	ST,	IF_LA,	0x38180000)
+INSTS(stx_d,	"stx.d",	0,	ST,	IF_LA,	0x381c0000)
+INSTS(stgt_b,	"stgt.b",	0,	0,	IF_LA,	0x387c0000)
+INSTS(stgt_h,	"stgt.h",	0,	0,	IF_LA,	0x387c8000)
+INSTS(stgt_w,	"stgt.w",	0,	0,	IF_LA,	0x387d0000)
+INSTS(stgt_d,	"stgt.d",	0,	0,	IF_LA,	0x387d8000)
+INSTS(stle_b,	"stle.b",	0,	0,	IF_LA,	0x387e0000)
+INSTS(stle_h,	"stle.h",	0,	0,	IF_LA,	0x387e8000)
+INSTS(stle_w,	"stle.w",	0,	0,	IF_LA,	0x387f0000)
+INSTS(stle_d,	"stle.d",	0,	0,	IF_LA,	0x387f8000)
+
+INSTS(dbar,    "dbar", 0,      0,      IF_LA,  0x38720000)
+INSTS(ibar,    "ibar", 0,      0,      IF_LA,  0x38728000)
+
+INSTS(syscall, "syscall",      0,      0,      IF_LA,  0x002b0000)
+INSTS(break,   "break",        0,      0,      IF_LA,  0x002a0005)
+
+INSTS(asrtle_d,        "asrtle.d",     0,      0,      IF_LA,  0x00010000)
+INSTS(asrtgt_d,        "asrtgt.d",     0,      0,      IF_LA,  0x00018000)
+
+INSTS(preld,   "preld",        0,      LD,     IF_LA,  0x2ac00000)
+INSTS(preldx,  "preldx",       0,      LD,     IF_LA,  0x382c0000)
+
+////Float instructions.
+////R_R_R.
+INSTS(fadd_s,	"fadd.s",	0,	0,	IF_LA,	0x01008000)
+INSTS(fadd_d,	"fadd.d",	0,	0,	IF_LA,	0x01010000)
+INSTS(fsub_s,	"fsub.s",	0,	0,	IF_LA,	0x01028000)
+INSTS(fsub_d,	"fsub.d",	0,	0,	IF_LA,	0x01030000)
+INSTS(fmul_s,	"fmul.s",	0,	0,	IF_LA,	0x01048000)
+INSTS(fmul_d,	"fmul.d",	0,	0,	IF_LA,	0x01050000)
+INSTS(fdiv_s,	"fdiv.s",	0,	0,	IF_LA,	0x01068000)
+INSTS(fdiv_d,	"fdiv.d",	0,	0,	IF_LA,	0x01070000)
+
+INSTS(fmax_s,	"fmax.s",	0,	0,	IF_LA,	0x01088000)
+INSTS(fmax_d,	"fmax.d",	0,	0,	IF_LA,	0x01090000)
+INSTS(fmin_s,	"fmin.s",	0,	0,	IF_LA,	0x010a8000)
+INSTS(fmin_d,	"fmin.d",	0,	0,	IF_LA,	0x010b0000)
+INSTS(fmaxa_s,	"fmaxa.s",	0,	0,	IF_LA,	0x010c8000)
+INSTS(fmaxa_d,	"fmaxa.d",	0,	0,	IF_LA,	0x010d0000)
+INSTS(fmina_s,	"fmina.s",	0,	0,	IF_LA,	0x010e8000)
+INSTS(fmina_d,	"fmina.d",	0,	0,	IF_LA,	0x010f0000)
+
+INSTS(fscaleb_s,	"fscaleb.s",	0,	0,	IF_LA,	0x01108000)
+INSTS(fscaleb_d,	"fscaleb.d",	0,	0,	IF_LA,	0x01110000)
+
+INSTS(fcopysign_s,	"fcopysign.s",	0,	0,	IF_LA,	0x01128000)
+INSTS(fcopysign_d,	"fcopysign.d",	0,	0,	IF_LA,	0x01130000)
+
+INSTS(fldx_s,	"fldx.s",	0,	LD,	IF_LA,	0x38300000)
+INSTS(fldx_d,	"fldx.d",	0,	LD,	IF_LA,	0x38340000)
+INSTS(fstx_s,	"fstx.s",	0,	ST,	IF_LA,	0x38380000)
+INSTS(fstx_d,	"fstx.d",	0,	ST,	IF_LA,	0x383c0000)
+
+INSTS(fldgt_s,	"fldgt.s",	0,	0,	IF_LA,	0x38740000)
+INSTS(fldgt_d,	"fldgt.d",	0,	0,	IF_LA,	0x38748000)
+INSTS(fldle_s,	"fldle.s",	0,	0,	IF_LA,	0x38750000)
+INSTS(fldle_d,	"fldle.d",	0,	0,	IF_LA,	0x38758000)
+INSTS(fstgt_s,	"fstgt.s",	0,	0,	IF_LA,	0x38760000)
+INSTS(fstgt_d,	"fstgt.d",	0,	0,	IF_LA,	0x38768000)
+INSTS(fstle_s,	"fstle.s",	0,	0,	IF_LA,	0x38770000)
+INSTS(fstle_d,	"fstle.d",	0,	0,	IF_LA,	0x38778000)
+
+////R_R_R_R.
+INSTS(fmadd_s,	"fmadd.s",	0,	0,	IF_LA,	0x08100000)
+INSTS(fmadd_d,	"fmadd.d",	0,	0,	IF_LA,	0x08200000)
+INSTS(fmsub_s,	"fmsub.s",	0,	0,	IF_LA,	0x08500000)
+INSTS(fmsub_d,	"fmsub.d",	0,	0,	IF_LA,	0x08600000)
+INSTS(fnmadd_s,	"fnmadd.s",	0,	0,	IF_LA,	0x08900000)
+INSTS(fnmadd_d,	"fnmadd.d",	0,	0,	IF_LA,	0x08a00000)
+INSTS(fnmsub_s,	"fnmsub.s",	0,	0,	IF_LA,	0x08d00000)
+INSTS(fnmsub_d,	"fnmsub.d",	0,	0,	IF_LA,	0x08e00000)
+
+////R_R.
+INSTS(fabs_s,	"fabs.s",	0,	0,	IF_LA,	0x01140400)
+INSTS(fabs_d,	"fabs.d",	0,	0,	IF_LA,	0x01140800)
+INSTS(fneg_s,	"fneg.s",	0,	0,	IF_LA,	0x01141400)
+INSTS(fneg_d,	"fneg.d",	0,	0,	IF_LA,	0x01141800)
+
+INSTS(fsqrt_s,	"fsqrt.s",	0,	0,	IF_LA,	0x01144400)
+INSTS(fsqrt_d,	"fsqrt.d",	0,	0,	IF_LA,	0x01144800)
+INSTS(frsqrt_s,	"frsqrt.s",	0,	0,	IF_LA,	0x01146400)
+INSTS(frsqrt_d,	"frsqrt.d",	0,	0,	IF_LA,	0x01146800)
+INSTS(frecip_s,	"frecip.s",	0,	0,	IF_LA,	0x01145400)
+INSTS(frecip_d,	"frecip.d",	0,	0,	IF_LA,	0x01145800)
+INSTS(flogb_s,	"flogb.s",	0,	0,	IF_LA,	0x01142400)
+INSTS(flogb_d,	"flogb.d",	0,	0,	IF_LA,	0x01142800)
+INSTS(fclass_s,	"fclass.s",	0,	0,	IF_LA,	0x01143400)
+INSTS(fclass_d,	"fclass.d",	0,	0,	IF_LA,	0x01143800)
+
+INSTS(fcvt_s_d,	"fcvt.s.d",	0,	0,	IF_LA,	0x01191800)
+INSTS(fcvt_d_s,	"fcvt.d.s",	0,	0,	IF_LA,	0x01192400)
+INSTS(ffint_s_w,	"ffint.s.w",	0,	0,	IF_LA,	0x011d1000)
+INSTS(ffint_s_l,	"ffint.s.l",	0,	0,	IF_LA,	0x011d1800)
+INSTS(ffint_d_w,	"ffint.d.w",	0,	0,	IF_LA,	0x011d2000)
+INSTS(ffint_d_l,	"ffint.d.l",	0,	0,	IF_LA,	0x011d2800)
+INSTS(ftint_w_s,	"ftint.w.s",	0,	0,	IF_LA,	0x011b0400)
+INSTS(ftint_w_d,	"ftint.w.d",	0,	0,	IF_LA,	0x011b0800)
+INSTS(ftint_l_s,	"ftint.l.s",	0,	0,	IF_LA,	0x011b2400)
+INSTS(ftint_l_d,	"ftint.l.d",	0,	0,	IF_LA,	0x011b2800)
+INSTS(ftintrm_w_s,	"ftintrm.w.s",	0,	0,	IF_LA,	0x011a0400)
+INSTS(ftintrm_w_d,	"ftintrm.w.d",	0,	0,	IF_LA,	0x011a0800)
+INSTS(ftintrm_l_s,	"ftintrm.l.s",	0,	0,	IF_LA,	0x011a2400)
+INSTS(ftintrm_l_d,	"ftintrm.l.d",	0,	0,	IF_LA,	0x011a2800)
+INSTS(ftintrp_w_s,	"ftintrp.w.s",	0,	0,	IF_LA,	0x011a4400)
+INSTS(ftintrp_w_d,	"ftintrp.w.d",	0,	0,	IF_LA,	0x011a4800)
+INSTS(ftintrp_l_s,	"ftintrp.l.s",	0,	0,	IF_LA,	0x011a6400)
+INSTS(ftintrp_l_d,	"ftintrp.l.d",	0,	0,	IF_LA,	0x011a6800)
+INSTS(ftintrz_w_s,	"ftintrz.w.s",	0,	0,	IF_LA,	0x011a8400)
+INSTS(ftintrz_w_d,	"ftintrz.w.d",	0,	0,	IF_LA,	0x011a8800)
+INSTS(ftintrz_l_s,	"ftintrz.l.s",	0,	0,	IF_LA,	0x011aa400)
+INSTS(ftintrz_l_d,	"ftintrz.l.d",	0,	0,	IF_LA,	0x011aa800)
+INSTS(ftintrne_w_s,	"ftintrne.w.s",	0,	0,	IF_LA,	0x011ac400)
+INSTS(ftintrne_w_d,	"ftintrne.w.d",	0,	0,	IF_LA,	0x011ac800)
+INSTS(ftintrne_l_s,	"ftintrne.l.s",	0,	0,	IF_LA,	0x011ae400)
+INSTS(ftintrne_l_d,	"ftintrne.l.d",	0,	0,	IF_LA,	0x011ae800)
+INSTS(frint_s,	"frint.s",	0,	0,	IF_LA,	0x011e4400)
+INSTS(frint_d,	"frint.d",	0,	0,	IF_LA,	0x011e4800)
+
+INSTS(fmov_s,	"fmov.s",	0,	0,	IF_LA,	0x01149400)
+INSTS(fmov_d,	"fmov.d",	0,	0,	IF_LA,	0x01149800)
+
+INSTS(movgr2fr_w,	"movgr2fr.w",	0,	0,	IF_LA,	0x0114a400)
+INSTS(movgr2fr_d,	"movgr2fr.d",	0,	0,	IF_LA,	0x0114a800)
+INSTS(movgr2frh_w,	"movgr2frh.w",	0,	0,	IF_LA,	0x0114ac00)
+INSTS(movfr2gr_s,	"movfr2gr.s",	0,	0,	IF_LA,	0x0114b400)
+INSTS(movfr2gr_d,	"movfr2gr.d",	0,	0,	IF_LA,	0x0114b800)
+INSTS(movfrh2gr_s,	"movfrh2gr.s",	0,	0,	IF_LA,	0x0114bc00)
+
+////
+INSTS(movgr2fcsr,	"movgr2fcsr",	0,	0,	IF_LA,	0x0114c000)
+INSTS(movfcsr2gr,	"movfcsr2gr",	0,	0,	IF_LA,	0x0114c800)
+INSTS(movfr2cf,	"movfr2cf",	0,	0,	IF_LA,	0x0114d000)
+INSTS(movcf2fr,	"movcf2fr",	0,	0,	IF_LA,	0x0114d400)
+INSTS(movgr2cf,	"movgr2cf",	0,	0,	IF_LA,	0x0114d800)
+INSTS(movcf2gr,	"movcf2gr",	0,	0,	IF_LA,	0x0114dc00)
+
+////R_R_I.
+INSTS(fcmp_caf_s,	"fcmp.caf.s",	0,	0,	IF_LA,	0x0c100000)
+INSTS(fcmp_cun_s,	"fcmp.cun.s",	0,	0,	IF_LA,	0x0c140000)
+INSTS(fcmp_ceq_s,	"fcmp.ceq.s",	0,	0,	IF_LA,	0x0c120000)
+INSTS(fcmp_cueq_s,	"fcmp.cueq.s",	0,	0,	IF_LA,	0x0c160000)
+INSTS(fcmp_clt_s,	"fcmp.clt.s",	0,	0,	IF_LA,	0x0c110000)
+INSTS(fcmp_cult_s,	"fcmp.cult.s",	0,	0,	IF_LA,	0x0c150000)
+INSTS(fcmp_cle_s,	"fcmp.cle.s",	0,	0,	IF_LA,	0x0c130000)
+INSTS(fcmp_cule_s,	"fcmp.cule.s",	0,	0,	IF_LA,	0x0c170000)
+INSTS(fcmp_cne_s,	"fcmp.cne.s",	0,	0,	IF_LA,	0x0c180000)
+INSTS(fcmp_cor_s,	"fcmp.cor.s",	0,	0,	IF_LA,	0x0c1a0000)
+INSTS(fcmp_cune_s,	"fcmp.cune.s",	0,	0,	IF_LA,	0x0c1c0000)
+
+INSTS(fcmp_saf_d,	"fcmp.saf.d",	0,	0,	IF_LA,	0x0c208000)
+INSTS(fcmp_sun_d,	"fcmp.sun.d",	0,	0,	IF_LA,	0x0c248000)
+INSTS(fcmp_seq_d,	"fcmp.seq.d",	0,	0,	IF_LA,	0x0c228000)
+INSTS(fcmp_sueq_d,	"fcmp.sueq.d",	0,	0,	IF_LA,	0x0c268000)
+INSTS(fcmp_slt_d,	"fcmp.slt.d",	0,	0,	IF_LA,	0x0c218000)
+INSTS(fcmp_sult_d,	"fcmp.sult.d",	0,	0,	IF_LA,	0x0c258000)
+INSTS(fcmp_sle_d,	"fcmp.sle.d",	0,	0,	IF_LA,	0x0c238000)
+INSTS(fcmp_sule_d,	"fcmp.sule.d",	0,	0,	IF_LA,	0x0c278000)
+INSTS(fcmp_sne_d,	"fcmp.sne.d",	0,	0,	IF_LA,	0x0c288000)
+INSTS(fcmp_sor_d,	"fcmp.sor.d",	0,	0,	IF_LA,	0x0c2a8000)
+INSTS(fcmp_sune_d,	"fcmp.sune.d",	0,	0,	IF_LA,	0x0c2c8000)
+
+INSTS(fcmp_caf_d,	"fcmp.caf.d",	0,	0,	IF_LA,	0x0c200000)
+INSTS(fcmp_cun_d,	"fcmp.cun.d",	0,	0,	IF_LA,	0x0c240000)
+INSTS(fcmp_ceq_d,	"fcmp.ceq.d",	0,	0,	IF_LA,	0x0c220000)
+INSTS(fcmp_cueq_d,	"fcmp.cueq.d",	0,	0,	IF_LA,	0x0c260000)
+INSTS(fcmp_clt_d,	"fcmp.clt.d",	0,	0,	IF_LA,	0x0c210000)
+INSTS(fcmp_cult_d,	"fcmp.cult.d",	0,	0,	IF_LA,	0x0c250000)
+INSTS(fcmp_cle_d,	"fcmp.cle.d",	0,	0,	IF_LA,	0x0c230000)
+INSTS(fcmp_cule_d,	"fcmp.cule.d",	0,	0,	IF_LA,	0x0c270000)
+INSTS(fcmp_cne_d,	"fcmp.cne.d",	0,	0,	IF_LA,	0x0c280000)
+INSTS(fcmp_cor_d,	"fcmp.cor.d",	0,	0,	IF_LA,	0x0c2a0000)
+INSTS(fcmp_cune_d,	"fcmp.cune.d",	0,	0,	IF_LA,	0x0c2c0000)
+
+INSTS(fcmp_saf_s,	"fcmp.saf.s",	0,	0,	IF_LA,	0x0c108000)
+INSTS(fcmp_sun_s,	"fcmp.sun.s",	0,	0,	IF_LA,	0x0c148000)
+INSTS(fcmp_seq_s,	"fcmp.seq.s",	0,	0,	IF_LA,	0x0c128000)
+INSTS(fcmp_sueq_s,	"fcmp.sueq.s",	0,	0,	IF_LA,	0x0c168000)
+INSTS(fcmp_slt_s,	"fcmp.slt.s",	0,	0,	IF_LA,	0x0c118000)
+INSTS(fcmp_sult_s,	"fcmp.sult.s",	0,	0,	IF_LA,	0x0c158000)
+INSTS(fcmp_sle_s,	"fcmp.sle.s",	0,	0,	IF_LA,	0x0c138000)
+INSTS(fcmp_sule_s,	"fcmp.sule.s",	0,	0,	IF_LA,	0x0c178000)
+INSTS(fcmp_sne_s,	"fcmp.sne.s",	0,	0,	IF_LA,	0x0c188000)
+INSTS(fcmp_sor_s,	"fcmp.sor.s",	0,	0,	IF_LA,	0x0c1a8000)
+INSTS(fcmp_sune_s,	"fcmp.sune.s",	0,	0,	IF_LA,	0x0c1c8000)
+
+////R_R_I.
+INSTS(fld_s,	"fld.s",	0,	LD,	IF_LA,	0x2b000000)
+INSTS(fld_d,	"fld.d",	0,	LD,	IF_LA,	0x2b800000)
+INSTS(fst_s,	"fst.s",	0,	ST,	IF_LA,	0x2b400000)
+INSTS(fst_d,	"fst.d",	0,	ST,	IF_LA,	0x2bc00000)
+
+
+// clang-format on
+/*****************************************************************************/
+#undef INSTS
+/*****************************************************************************/
diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h
index d8cb5cabfb065..8f0ab9b4f7954 100644
--- a/src/coreclr/jit/jit.h
+++ b/src/coreclr/jit/jit.h
@@ -42,6 +42,9 @@
 #if defined(HOST_ARM64)
 #error Cannot define both HOST_X86 and HOST_ARM64
 #endif
+#if defined(HOST_LOONGARCH64)
+#error Cannot define both HOST_X86 and HOST_LOONGARCH64
+#endif
 #elif defined(HOST_AMD64)
 #if defined(HOST_X86)
 #error Cannot define both HOST_AMD64 and HOST_X86
@@ -52,6 +55,9 @@
 #if defined(HOST_ARM64)
 #error Cannot define both HOST_AMD64 and HOST_ARM64
 #endif
+#if defined(HOST_LOONGARCH64)
+#error Cannot define both HOST_AMD64 and HOST_LOONGARCH64
+#endif
 #elif defined(HOST_ARM)
 #if defined(HOST_X86)
 #error Cannot define both HOST_ARM and HOST_X86
@@ -62,6 +68,9 @@
 #if defined(HOST_ARM64)
 #error Cannot define both HOST_ARM and HOST_ARM64
 #endif
+#if defined(HOST_LOONGARCH64)
+#error Cannot define both HOST_ARM and HOST_LOONGARCH64
+#endif
 #elif defined(HOST_ARM64)
 #if defined(HOST_X86)
 #error Cannot define both HOST_ARM64 and HOST_X86
@@ -72,6 +81,22 @@
 #if defined(HOST_ARM)
 #error Cannot define both HOST_ARM64 and HOST_ARM
 #endif
+#if defined(HOST_LOONGARCH64)
+#error Cannot define both HOST_ARM64 and HOST_LOONGARCH64
+#endif
+#elif defined(HOST_LOONGARCH64)
+#if defined(HOST_X86)
+#error Cannot define both HOST_LOONGARCH64 and HOST_X86
+#endif
+#if defined(HOST_AMD64)
+#error Cannot define both HOST_LOONGARCH64 and HOST_AMD64
+#endif
+#if defined(HOST_ARM)
+#error Cannot define both HOST_LOONGARCH64 and HOST_ARM
+#endif
+#if defined(HOST_ARM64)
+#error Cannot define both HOST_LOONGARCH64 and HOST_ARM64
+#endif
 #else
 #error Unsupported or unset host architecture
 #endif
@@ -86,6 +111,9 @@
 #if defined(TARGET_ARM64)
 #error Cannot define both TARGET_X86 and TARGET_ARM64
 #endif
+#if defined(TARGET_LOONGARCH64)
+#error Cannot define both TARGET_X86 and TARGET_LOONGARCH64
+#endif
 #elif defined(TARGET_AMD64)
 #if defined(TARGET_X86)
 #error Cannot define both TARGET_AMD64 and TARGET_X86
@@ -96,6 +124,9 @@
 #if defined(TARGET_ARM64)
 #error Cannot define both TARGET_AMD64 and TARGET_ARM64
 #endif
+#if defined(TARGET_LOONGARCH64)
+#error Cannot define both TARGET_AMD64 and TARGET_LOONGARCH64
+#endif
 #elif defined(TARGET_ARM)
 #if defined(TARGET_X86)
 #error Cannot define both TARGET_ARM and TARGET_X86
@@ -106,6 +137,9 @@
 #if defined(TARGET_ARM64)
 #error Cannot define both TARGET_ARM and TARGET_ARM64
 #endif
+#if defined(TARGET_LOONGARCH64)
+#error Cannot define both TARGET_ARM and TARGET_LOONGARCH64
+#endif
 #elif defined(TARGET_ARM64)
 #if defined(TARGET_X86)
 #error Cannot define both TARGET_ARM64 and TARGET_X86
@@ -116,6 +150,22 @@
 #if defined(TARGET_ARM)
 #error Cannot define both TARGET_ARM64 and TARGET_ARM
 #endif
+#if defined(TARGET_LOONGARCH64)
+#error Cannot define both TARGET_ARM64 and TARGET_LOONGARCH64
+#endif
+#elif defined(TARGET_LOONGARCH64)
+#if defined(TARGET_X86)
+#error Cannot define both TARGET_LOONGARCH64 and TARGET_X86
+#endif
+#if defined(TARGET_AMD64)
+#error Cannot define both TARGET_LOONGARCH64 and TARGET_AMD64
+#endif
+#if defined(TARGET_ARM)
+#error Cannot define both TARGET_LOONGARCH64 and TARGET_ARM
+#endif
+#if defined(TARGET_ARM64)
+#error Cannot define both TARGET_LOONGARCH64 and TARGET_ARM64
+#endif
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -163,6 +213,8 @@
 #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARMNT
 #elif defined(TARGET_ARM64)
 #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARM64 // 0xAA64
+#elif defined(TARGET_LOONGARCH64)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_LOONGARCH64 // 0xDD64
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -207,6 +259,14 @@
 #define UNIX_AMD64_ABI_ONLY(x)
 #endif // defined(UNIX_AMD64_ABI)
 
+#if defined(TARGET_LOONGARCH64)
+#define UNIX_LOONGARCH64_ONLY_ARG(x) , x
+#define UNIX_LOONGARCH64_ONLY(x) x
+#else // !TARGET_LOONGARCH64
+#define UNIX_LOONGARCH64_ONLY_ARG(x)
+#define UNIX_LOONGARCH64_ONLY(x)
+#endif // TARGET_LOONGARCH64
+
 #if defined(DEBUG)
 #define DEBUG_ARG_SLOTS
 #endif
@@ -224,7 +284,7 @@
 #define DEBUG_ARG_SLOTS_ASSERT(x)
 #endif
 
-#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64)
+#if defined(UNIX_AMD64_ABI) || !defined(TARGET_64BIT) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 #define FEATURE_PUT_STRUCT_ARG_STK 1
 #endif
 
@@ -236,7 +296,7 @@
 #define UNIX_AMD64_ABI_ONLY(x)
 #endif // defined(UNIX_AMD64_ABI)
 
-#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 #define MULTIREG_HAS_SECOND_GC_RET 1
 #define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x) , x
 #define MULTIREG_HAS_SECOND_GC_RET_ONLY(x) x
@@ -249,7 +309,7 @@
 // Arm64 Windows supports FEATURE_ARG_SPLIT, note this is different from
 // the official Arm64 ABI.
 // Case: splitting 16 byte struct between x7 and stack
-#if defined(TARGET_ARM) || defined(TARGET_ARM64)
+#if defined(TARGET_ARM) || defined(TARGET_ARM64)/* || defined(TARGET_LOONGARCH64)*/
 #define FEATURE_ARG_SPLIT 1
 #else
 #define FEATURE_ARG_SPLIT 0
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 5f9fa62008b30..7478529966657 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -343,6 +343,11 @@ CONFIG_INTEGER(EnableArm64Sm4,          W("EnableArm64Sm4"), 1)
 CONFIG_INTEGER(EnableArm64Sve,          W("EnableArm64Sve"), 1)
 #endif // defined(TARGET_ARM64)
 
+#if defined(TARGET_LOONGARCH64)
+//TODO: should add LOONGARCH64's features here.
+CONFIG_INTEGER(EnableHWIntrinsic,       W("EnableHWIntrinsic"), 0)
+#endif // defined(TARGET_LOONGARCH64)
+
 // clang-format on
 
 #ifdef FEATURE_SIMD
@@ -571,6 +576,18 @@ CONFIG_STRING(JitFunctionFile, W("JitFunctionFile"))
 //    of the frame)
 CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSavedRegisters"), 0)
 #endif // defined(TARGET_ARM64)
+
+#if defined(TARGET_LOONGARCH64)
+// JitSaveFpRaWithCalleeSavedRegisters:
+//    0: use default frame type decision
+//    1: disable frames that save FP/RA registers with the callee-saved registers (at the top of the frame)
+//    2: force all frames to use the frame types that save FP/RA registers with the callee-saved registers (at the top
+//    of the frame)
+CONFIG_INTEGER(JitSaveFpRaWithCalleeSavedRegisters, W("JitSaveFpRaWithCalleeSavedRegisters"), 0)
+
+// Disable emitDispIns by default
+CONFIG_INTEGER(JitDispIns, W("JitDispIns"), 0)
+#endif // defined(TARGET_LOONGARCH64)
 #endif // DEBUG
 
 CONFIG_INTEGER(JitEnregStructLocals, W("JitEnregStructLocals"), 1) // Allow to enregister locals with struct type.
diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp
index 5cf2d7bfb8e68..be8f1ec1fb903 100644
--- a/src/coreclr/jit/jiteh.cpp
+++ b/src/coreclr/jit/jiteh.cpp
@@ -888,7 +888,7 @@ unsigned Compiler::ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTr
     assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX);
     assert(ehGetDsc(finallyIndex)->HasFinallyHandler());
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     return ehGetDsc(finallyIndex)->ebdGetEnclosingRegionIndex(inTryRegion);
 #else
     *inTryRegion = true;
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 4d01e63ef13b6..2cd755c49c475 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -643,7 +643,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         LclVarDsc*           varDsc  = varDscInfo->varDsc;
         CORINFO_CLASS_HANDLE typeHnd = nullptr;
 
+#if defined(TARGET_LOONGARCH64)
+        int flags = 0;
+        CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd, &flags);
+#else
         CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd);
+#endif
         varDsc->lvIsParam              = 1;
 
         lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args);
@@ -654,7 +659,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             lvaSetClass(varDscInfo->varNum, clsHnd);
         }
 
-        // For ARM, ARM64, and AMD64 varargs, all arguments go in integer registers
+        // For ARM, ARM64, LOONGARCH64, and AMD64 varargs, all arguments go in integer registers
         var_types argType = mangleVarArgsType(varDsc->TypeGet());
 
         var_types origArgType = argType;
@@ -808,6 +813,24 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             }
         }
 #else // !TARGET_ARM
+
+#if defined(TARGET_LOONGARCH64)
+
+        if (compFeatureArgSplit())
+        {
+            // This does not affect the normal calling convention for LoongArch64!!
+            if (this->info.compIsVarArgs && argType == TYP_STRUCT)
+            {
+                if (varDscInfo->canEnreg(TYP_INT, 1) &&     // The beginning of the struct can go in a register
+                    !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register
+                {
+                    cSlotsToEnregister = 1; // Force the split
+                }
+            }
+        }
+
+#endif // defined(TARGET_LOONGARCH64)
+
 #if defined(UNIX_AMD64_ABI)
         SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
         if (varTypeIsStruct(argType))
@@ -868,9 +891,72 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister);
         }
         else
+#elif defined(TARGET_LOONGARCH64)
+        var_types arg1_Type = TYP_UNKNOWN;
+        var_types arg2_Type = TYP_UNKNOWN;
+        if (flags & 0xf)
+        {
+            assert(varTypeIsStruct(argType));
+            int float_num = 0;
+            if (flags == 1)
+            {
+                assert(argSize <= 8);
+                assert(varDsc->lvExactSize <= argSize);
+                float_num = 1;
+
+                arg1_Type = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT;
+                canPassArgInRegisters = varDscInfo->canEnreg(arg1_Type, 1);
+            }
+            else if (flags & 0x8)
+            {
+                arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT;
+                arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT;
+                float_num = 2;
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2);
+            }
+            else if (flags & 2)
+            {
+                float_num = 1;
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
+                canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
+
+                arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT;
+                arg2_Type = (flags & 0x20) ? TYP_LONG : TYP_INT;
+            }
+            else if (flags & 4)
+            {
+                float_num = 1;
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
+                canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
+
+                arg1_Type = (flags & 0x10) ? TYP_LONG : TYP_INT;
+                arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT;
+            }
+
+            if (!canPassArgInRegisters)
+            {
+                assert(float_num > 0);
+                canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
+                arg1_Type = TYP_UNKNOWN;
+                arg2_Type = TYP_UNKNOWN;
+            }
+        }
+        else
 #endif // defined(UNIX_AMD64_ABI)
         {
             canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
+#if defined(TARGET_LOONGARCH64)
+            if (!canPassArgInRegisters && varTypeIsFloating(argType))
+            {
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister);
+                argType = canPassArgInRegisters ? TYP_I_IMPL : argType;
+            }
+            if (!canPassArgInRegisters && (cSlots > 1))
+            {
+                canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1);
+                arg1_Type = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN;
+            }
+#endif
         }
 
         if (canPassArgInRegisters)
@@ -900,7 +986,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 }
             }
             else
-#endif // defined(UNIX_AMD64_ABI)
+#elif  defined(TARGET_LOONGARCH64)
+            if (arg1_Type != TYP_UNKNOWN)
+            {
+                firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1_Type, 1);
+            }
+            else
+#endif // defined(TARGET_LOONGARCH64)
             {
                 firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots);
             }
@@ -948,6 +1040,40 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType));
                 }
             }
+#elif defined(TARGET_LOONGARCH64)
+            if (argType == TYP_STRUCT)
+            {
+                if (arg1_Type != TYP_UNKNOWN)
+                {
+                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg1_Type));
+                    varDsc->lvIs4Field1 = (int)emitActualTypeSize(arg1_Type) == 4 ? 1 : 0;
+                    if (arg2_Type != TYP_UNKNOWN)
+                    {
+                        firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2_Type, 1);
+                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2_Type));
+                        varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2_Type) == 4 ? 1 : 0;
+                        varDscInfo->hasMultiSlotStruct = true;
+                    }
+                    else if (cSlots > 1)
+                    {
+                        varDsc->lvIsSplit = 1;
+                        //varDsc->lvFldOffset = 0;
+                        varDsc->SetOtherArgReg(REG_STK);
+                        varDscInfo->hasMultiSlotStruct = true;
+                        varDscInfo->setAllRegArgUsed(arg1_Type);
+                        varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
+                    }
+                }
+                else
+                {
+                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL));
+                    if (cSlots == 2)
+                    {
+                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL));
+                        varDscInfo->hasMultiSlotStruct = true;
+                    }
+                }
+            }
 #else  // ARM32
             if (varTypeIsStruct(argType))
             {
@@ -1064,6 +1190,10 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             //
             varDscInfo->setAllRegArgUsed(argType);
 
+#elif defined(TARGET_LOONGARCH64)
+
+            varDscInfo->setAllRegArgUsed(argType);
+
 #endif // TARGET_XXX
 
 #if FEATURE_FASTTAILCALL
@@ -1383,7 +1513,12 @@ void Compiler::lvaInitVarDsc(LclVarDsc*              varDsc,
 
 #if defined(TARGET_AMD64) || defined(TARGET_ARM64)
     varDsc->lvIsImplicitByRef = 0;
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#elif defined(TARGET_LOONGARCH64)
+    varDsc->lvIsImplicitByRef = 0;
+    varDsc->lvIs4Field1 = 0;
+    varDsc->lvIs4Field2 = 0;
+    varDsc->lvIsSplit = 0;
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
     // Set the lvType (before this point it is TYP_UNDEF).
 
@@ -2020,7 +2155,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum)
         JITDUMP("Not promoting multi-reg returned struct local V%02u with holes.\n", lclNum);
         shouldPromote = false;
     }
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64)
     // TODO-PERF - Only do this when the LclVar is used in an argument context
     // TODO-ARM64 - HFA support should also eliminate the need for this.
     // TODO-ARM32 - HFA support should also eliminate the need for this.
@@ -2037,7 +2172,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum)
                 lclNum, structPromotionInfo.fieldCnt);
         shouldPromote = false;
     }
-#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM
+#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64
     else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa())
     {
 #if FEATURE_MULTIREG_STRUCT_PROMOTE
@@ -2352,7 +2487,7 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum)
             compiler->compLongUsed = true;
         }
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
         // Reset the implicitByRef flag.
         fieldVarDsc->lvIsImplicitByRef = 0;
@@ -2668,7 +2803,7 @@ bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc, bool isVarArg)
             return true;
         }
 
-#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         if (howToPassStruct == SPK_ByValue)
         {
             assert(type == TYP_STRUCT);
@@ -2709,7 +2844,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
             CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF;
             varDsc->lvType              = impNormStructType(typeHnd, &simdBaseJitType);
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // Mark implicit byref struct parameters
             if (varDsc->lvIsParam && !varDsc->lvIsStructField)
             {
@@ -2722,7 +2857,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
                     varDsc->lvIsImplicitByRef = 1;
                 }
             }
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
 #if FEATURE_SIMD
             if (simdBaseJitType != CORINFO_TYPE_UNDEF)
@@ -3702,20 +3837,20 @@ size_t LclVarDsc::lvArgStackSize() const
 #if defined(WINDOWS_AMD64_ABI)
         // Structs are either passed by reference or can be passed by value using one pointer
         stackSize = TARGET_POINTER_SIZE;
-#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI)
+#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
         // lvSize performs a roundup.
         stackSize = this->lvSize();
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         if ((stackSize > TARGET_POINTER_SIZE * 2) && (!this->lvIsHfa()))
         {
             // If the size is greater than 16 bytes then it will
             // be passed by reference.
             stackSize = TARGET_POINTER_SIZE;
         }
-#endif // defined(TARGET_ARM64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
-#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI
+#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI !TARGET_LOONGARCH64
 
         NYI("Unsupported target.");
         unreached();
@@ -5224,7 +5359,14 @@ void Compiler::lvaFixVirtualFrameOffsets()
         JITDUMP("--- delta bump %d for RBP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta());
         delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
     }
-#endif // TARGET_AMD64
+#elif defined(TARGET_LOONGARCH64)
+    else
+    {
+        // FP is used.
+        JITDUMP("--- delta bump %d for RBP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta());
+        delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
+    }
+#endif // TARGET_LOONGARCH64
 
     // For OSR, update the delta to reflect the current policy that
     // RBP points at the base of the new frame, and RSP is relative to that RBP.
@@ -5287,6 +5429,11 @@ void Compiler::lvaFixVirtualFrameOffsets()
             JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta);
             varDsc->SetStackOffset(varDsc->GetStackOffset() + delta);
 
+#if  defined(TARGET_LOONGARCH64)
+            if (varDsc->GetStackOffset() >= delta)
+                varDsc->SetStackOffset(varDsc->GetStackOffset() + (varDsc->lvIsSplit ? 8 : 0));
+#endif
+
 #if DOUBLE_ALIGN
             if (genDoubleAlign() && !codeGen->isFramePointerUsed())
             {
@@ -5343,6 +5490,18 @@ void Compiler::lvaFixVirtualFrameOffsets()
     {
         lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES);
     }
+#elif defined(TARGET_LOONGARCH64)
+    // We normally add alignment below the locals between them and the outgoing
+    // arg space area. When we store fp/ra at the bottom, however, this will be
+    // below the alignment. So we should not apply the alignment adjustment to
+    // them. On LOONGARCH64 it turns out we always store these at +0 and +8 of the FP,
+    // so instead of dealing with skipping adjustment just for them we just set
+    // them here always.
+    assert(codeGen->isFramePointerUsed());
+    if (lvaRetAddrVar != BAD_VAR_NUM)
+    {
+        lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES);
+    }
 #endif
 }
 
@@ -5740,7 +5899,7 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
          * when updating the current offset on the stack */
         CLANG_FORMAT_COMMENT_ANCHOR;
 
-#if !defined(TARGET_ARMARCH)
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
 #if DEBUG
         // TODO: Remove this noway_assert and replace occurrences of TARGET_POINTER_SIZE with argSize
         // Also investigate why we are incrementing argOffs for X86 as this seems incorrect
@@ -5848,6 +6007,18 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
             varDsc->SetStackOffset(argOffs);
             argOffs += argSize;
         }
+
+#elif defined(TARGET_LOONGARCH64)
+        //if (compFeatureArgSplit() && this->info.compIsVarArgs)
+        //{//TODO: should confirm for "info.compIsVarArgs".
+        //    if (varDsc->lvType == TYP_STRUCT && varDsc->lvOtherArgReg >= MAX_REG_ARG && varDsc->lvOtherArgReg != REG_NA)
+        //    {
+        //        // This is a split struct. It will account for an extra (8 bytes)
+        //        // of alignment.
+        //        varDsc->lvStkOffs += TARGET_POINTER_SIZE;
+        //        argOffs += TARGET_POINTER_SIZE;
+        //    }
+        //}
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -6075,6 +6246,30 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif // TARGET_XARCH
 
+#ifdef TARGET_LOONGARCH64
+    // Decide where to save FP and RA registers. We store FP/RA registers at the bottom of the frame if there is
+    // a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we
+    // need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value,
+    // and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the
+    // frame types. Since saving FP/RA at high addresses is a relatively rare case, force using it during stress.
+    // (It should be legal to use these frame types for every frame).
+
+    if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 0)
+    {
+        // Default configuration
+        codeGen->SetSaveFpRaWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) ||
+                                                        compStressCompile(STRESS_GENERIC_VARN, 20));
+    }
+    else if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 1)
+    {
+        codeGen->SetSaveFpRaWithAllCalleeSavedRegisters(false); // Disable using new frames
+    }
+    else if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 2)
+    {
+        codeGen->SetSaveFpRaWithAllCalleeSavedRegisters(true); // Force using new frames
+    }
+#endif // TARGET_LOONGARCH64
+
     int  preSpillSize    = 0;
     bool mustDoubleAlign = false;
 
@@ -6116,7 +6311,29 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
     }
 
-#else // !TARGET_ARM64
+#elif defined(TARGET_LOONGARCH64)
+
+    int initialStkOffs = 0;
+    if (info.compIsVarArgs)
+    {
+        // For varargs we always save all of the integer register arguments
+        // so that they are contiguous with the incoming stack arguments.
+        initialStkOffs = MAX_REG_ARG * REGSIZE_BYTES;
+        stkOffs -= initialStkOffs;
+    }
+    if (codeGen->IsSaveFpRaWithAllCalleeSavedRegisters() ||
+        !isFramePointerUsed()) // Note that currently we always have a frame pointer
+    {
+        stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
+    }
+    else
+    {
+        // Subtract off FP and RA.
+        assert(compCalleeRegsPushed >= 2);
+        stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
+    }
+
+#else // !TARGET_LOONGARCH64
 #ifdef TARGET_ARM
     // On ARM32 LR is part of the pushed registers and is always stored at the
     // top.
@@ -6127,7 +6344,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 #endif
 
     stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
-#endif // !TARGET_ARM64
+#endif // !TARGET_LOONGARCH64
 
     compLclFrameSize = 0;
 
@@ -6192,6 +6409,17 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif // FEATURE_EH_FUNCLETS && defined(TARGET_ARMARCH)
 
+#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_LOONGARCH64)
+    if (lvaPSPSym != BAD_VAR_NUM)
+    {
+        // If we need a PSPSym, allocate it first, before anything else, including
+        // padding (so we can avoid computing the same padding in the funclet
+        // frame). Note that there is no special padding requirement for the PSPSym.
+        noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
+    }
+#endif // FEATURE_EH_FUNCLETS || TARGET_LOONGARCH64
+
     if (mustDoubleAlign)
     {
         if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
@@ -6591,6 +6819,15 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                 }
 #endif
 
+#ifdef TARGET_LOONGARCH64
+                if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum())
+                {//TODO: add VarArgs for LOONGARCH64.
+                    // Stack offset to parameters should point to home area which will be preallocated.
+                    varDsc->SetStackOffset(-initialStkOffs + genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES);
+                    continue;
+                }
+#endif
+
 #endif // !TARGET_AMD64
             }
 
@@ -6700,6 +6937,19 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
             }
 #endif // TARGET_ARM
 #endif // TARGET_ARM64
+
+#ifdef TARGET_LOONGARCH64
+            // If we have an incoming register argument that has a struct promoted field
+            // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar
+            //
+            if (varDsc->lvIsRegArg && varDsc->lvPromotedStruct())
+            {
+                noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+                unsigned fieldVarNum = varDsc->lvFieldLclStart;
+                lvaTable[fieldVarNum].SetStackOffset(varDsc->GetStackOffset());
+            }
+#endif // TARGET_LOONGARCH64
         }
     }
 
@@ -6804,6 +7054,15 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif // TARGET_ARM64
 
+#ifdef TARGET_LOONGARCH64
+    if (!codeGen->IsSaveFpRaWithAllCalleeSavedRegisters() &&
+        isFramePointerUsed()) // Note that currently we always have a frame pointer
+    {
+        // Create space for saving FP and RA.
+        stkOffs -= 2 * REGSIZE_BYTES;
+    }
+#endif // TARGET_LOONGARCH64
+
 #if FEATURE_FIXED_OUT_ARGS
     if (lvaOutgoingArgSpaceSize > 0)
     {
@@ -6839,6 +7098,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     pushedCount += 1; // pushed PC (return address)
 #endif
 
+#ifdef TARGET_LOONGARCH64
+    if (info.compIsVarArgs)
+    {
+        pushedCount += MAX_REG_ARG;
+    }
+#endif
+
     noway_assert(compLclFrameSize + originalFrameSize ==
                  (unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE)));
 }
@@ -7092,6 +7358,34 @@ void Compiler::lvaAlignFrame()
         }
     }
 
+#elif defined(TARGET_LOONGARCH64)
+
+    // First, align up to 8.
+    if ((compLclFrameSize % 8) != 0)
+    {
+        lvaIncrementFrameSize(8 - (compLclFrameSize % 8));
+    }
+    else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+    {
+        // If we are not doing final layout, we don't know the exact value of compLclFrameSize
+        // and thus do not know how much we will need to add in order to be aligned.
+        // We add 8 so compLclFrameSize is still a multiple of 8.
+        lvaIncrementFrameSize(8);
+    }
+    assert((compLclFrameSize % 8) == 0);
+
+    // Ensure that the stack is always 16-byte aligned by grabbing an unused 16-byte
+    // if needed.
+    bool regPushedCountAligned = (compCalleeRegsPushed % (16 / REGSIZE_BYTES)) != 0;
+    bool lclFrameSizeAligned   = (compLclFrameSize % 16) != 0;
+
+    // If this isn't the final frame layout, assume we have to push an extra QWORD
+    // Just so the offsets are true upper limits.
+    if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || (regPushedCountAligned != lclFrameSizeAligned))
+    {
+        lvaIncrementFrameSize(REGSIZE_BYTES);
+    }
+
 #else
     NYI("TARGET specific lvaAlignFrame");
 #endif // !TARGET_AMD64
@@ -7666,6 +7960,11 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
     {
         compCalleeFPRegsSavedMask = RBM_NONE;
     }
+#elif defined(TARGET_LOONGARCH64)
+    if (compFloatingPointUsed)
+        compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT;
+
+    compCalleeRegsPushed++; // we always push RA.  See genPushCalleeSavedRegisters
 #endif
 
 #if DOUBLE_ALIGN
@@ -7696,6 +7995,14 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
     calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR.  See genPushCalleeSavedRegisters
 #endif
 
+#if defined(TARGET_LOONGARCH64)
+    if (compFloatingPointUsed)
+    {
+        calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
+    }
+    calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push RA.  See genPushCalleeSavedRegisters
+#endif
+
     result = compLclFrameSize + calleeSavedRegMaxSz;
     return result;
 }
@@ -7988,6 +8295,13 @@ Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData*
         padding   = roundUp(padding, alignment);
 #endif // TARGET_ARMARCH
 
+#ifdef TARGET_LOONGARCH64
+        unsigned alignment = 1;
+        pComp->codeGen->InferOpSizeAlign(lcl, &alignment);
+        alignment = roundUp(alignment, TARGET_POINTER_SIZE);
+        padding   = roundUp(padding, alignment);
+#endif // TARGET_LOONGARCH64
+
         // Change the variable to a TYP_BLK
         if (varType != TYP_BLK)
         {
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 2467a00accdaa..c484c0fd0c9b1 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -197,7 +197,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
             LowerCast(node);
             break;
 
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         case GT_ARR_BOUNDS_CHECK:
 #ifdef FEATURE_SIMD
         case GT_SIMD_CHK:
@@ -230,7 +230,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
         case GT_LSH:
         case GT_RSH:
         case GT_RSZ:
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             LowerShift(node->AsOp());
 #else
             ContainCheckShiftRotate(node->AsOp());
@@ -310,7 +310,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
             LowerStoreLocCommon(node->AsLclVarCommon());
             break;
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         case GT_CMPXCHG:
             CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand);
             break;
@@ -338,7 +338,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
             break;
 #endif
 
-#ifndef TARGET_ARMARCH
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
         // TODO-ARMARCH-CQ: We should contain this as long as the offset fits.
         case GT_OBJ:
             if (node->AsObj()->Addr()->OperIsLocalAddr())
@@ -1040,6 +1040,15 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
             MakeSrcContained(arg, arg->AsObj()->Addr());
         }
     }
+#elif defined(TARGET_LOONGARCH64)
+    if (type == TYP_STRUCT)
+    {
+        arg->SetContained();
+        if ((arg->OperGet() == GT_OBJ) && (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR))
+        {
+            MakeSrcContained(arg, arg->AsObj()->Addr());
+        }
+    }
 #endif
 
 #if FEATURE_ARG_SPLIT
@@ -1423,6 +1432,30 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
         }
 #endif // TARGET_ARMARCH
 
+#if defined(TARGET_LOONGARCH64)
+        if (call->IsVarargs() /*|| comp->opts.compUseSoftFP*/)
+        {
+            // For vararg call or on armel, reg args should be all integer.
+            // Insert copies as needed to move float value to integer register.
+            GenTree* newNode = LowerFloatArg(ppArg, info);
+            if (newNode != nullptr)
+            {
+                type = newNode->TypeGet();
+            }
+        }
+        else
+        {
+            GenTree* putArg = NewPutArg(call, arg, info, type);
+
+            // In the case of register passable struct (in one or two registers)
+            // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.)
+            // If an extra node is returned, splice it in the right place in the tree.
+            if (arg != putArg)
+            {
+                ReplaceArgWithPutArgOrBitcast(ppArg, putArg);
+            }
+        }
+#else
         GenTree* putArg = NewPutArg(call, arg, info, type);
 
         // In the case of register passable struct (in one or two registers)
@@ -1432,10 +1465,11 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
         {
             ReplaceArgWithPutArgOrBitcast(ppArg, putArg);
         }
+#endif
     }
 }
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 //------------------------------------------------------------------------
 // LowerFloatArg: Lower float call arguments on the arm platform.
 //
@@ -2498,7 +2532,8 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
 {
     assert(cmp->gtGetOp2()->IsIntegralConst());
 
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64)// || defined(TARGET_LOONGARCH64)
+    ////TODO: add optimize for LoongArch64.
     GenTree*       op1      = cmp->gtGetOp1();
     GenTreeIntCon* op2      = cmp->gtGetOp2()->AsIntCon();
     ssize_t        op2Value = op2->IconValue();
@@ -2867,7 +2902,53 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
             return nullptr;
         }
     }
-#endif // TARGET_ARM64
+#elif defined(TARGET_LOONGARCH64)
+    GenTree* relop    = jtrue->gtGetOp1();
+    GenTree* relopOp1 = relop->AsOp()->gtGetOp1();
+    GenTree* relopOp2 = relop->AsOp()->gtGetOp2();
+
+    if (relopOp1->IsCnsIntOrI() && relopOp2->IsCnsIntOrI())
+    {
+        relopOp1->SetContained();
+        relopOp2->SetContained();
+    }
+    else if (relop->gtNext == jtrue)
+    {
+        if (relopOp2->IsCnsIntOrI())
+        {
+            if (relop->OperIs(GT_EQ, GT_NE))
+            {
+
+                // Codegen will use beq or bne in codegen.
+                GenTreeFlags flags = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : GTF_EMPTY;
+
+                relop->SetOper(GT_JCMP);
+                relop->gtFlags &= ~(GTF_JCMP_TST | GTF_JCMP_EQ);
+                relop->gtFlags |= flags;
+                relop->gtType = TYP_VOID;
+
+                relopOp2->SetContained();
+
+                BlockRange().Remove(jtrue);
+
+                assert(relop->gtNext == nullptr);
+                return nullptr;
+            }
+        }
+        else if (relopOp1->IsCnsIntOrI())
+        {
+            relopOp1->SetContained();
+        }
+    }
+    else if (relopOp1->IsCnsIntOrI())
+    {
+        relopOp1->SetContained();
+    }
+    else if (relopOp2->IsCnsIntOrI())
+    {
+        relopOp2->SetContained();
+    }
+#endif // TARGET_LOONGARCH64
 
     ContainCheckJTrue(jtrue);
 
@@ -5106,7 +5187,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
             return next;
         }
 
-#ifndef TARGET_ARMARCH
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
         if (BlockRange().TryGetUse(node, &use))
         {
             // If this is a child of an indir, let the parent handle it.
@@ -5117,7 +5198,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
                 TryCreateAddrMode(node, false);
             }
         }
-#endif // !TARGET_ARMARCH
+#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64
     }
 
     if (node->OperIs(GT_ADD))
@@ -5283,7 +5364,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
     }
 
 // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     if (!comp->opts.MinOpts() && (divisorValue >= 3))
     {
         size_t magic;
@@ -5363,7 +5444,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
         GenTree* firstNode        = nullptr;
         GenTree* adjustedDividend = dividend;
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         // On ARM64 we will use a 32x32->64 bit multiply instead of a 64x64->64 one.
         bool widenToNativeIntForMul = (type != TYP_I_IMPL) && !simpleMul;
 #else
@@ -5417,7 +5498,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
         }
         else
         {
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) //|| defined(TARGET_LOONGARCH64)
             // 64-bit MUL is more expensive than UMULL on ARM64.
             genTreeOps mulOper = simpleMul ? GT_MUL_LONG : GT_MULHI;
 #else
@@ -5509,7 +5590,11 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
     GenTree* dividend = divMod->gtGetOp1();
     GenTree* divisor  = divMod->gtGetOp2();
 
+#if defined(TARGET_LOONGARCH64)
+    const var_types type = genActualType(divMod->TypeGet());
+#else
     const var_types type = divMod->TypeGet();
+#endif
     assert((type == TYP_INT) || (type == TYP_LONG));
 
 #if defined(USE_HELPERS_FOR_INT_DIV)
@@ -5571,7 +5656,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
             return nullptr;
         }
 
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         ssize_t magic;
         int     shift;
 
diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h
index ed0ecc5661970..d5835f183e3ba 100644
--- a/src/coreclr/jit/lower.h
+++ b/src/coreclr/jit/lower.h
@@ -157,7 +157,7 @@ class Lowering final : public Phase
     void ReplaceArgWithPutArgOrBitcast(GenTree** ppChild, GenTree* newNode);
     GenTree* NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* info, var_types type);
     void LowerArg(GenTreeCall* call, GenTree** ppTree);
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     GenTree* LowerFloatArg(GenTree** pArg, fgArgTabEntry* info);
     GenTree* LowerFloatArgReg(GenTree* arg, regNumber regNum);
 #endif
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
new file mode 100644
index 0000000000000..0b77c8a27d3b8
--- /dev/null
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -0,0 +1,1693 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Copyright (c) Loongson Technology. All rights reserved.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX             Lowering for LOONGARCH64 common code                               XX
+XX                                                                           XX
+XX  This encapsulates common logic for lowering trees for the LOONGARCH64         XX
+XX  architectures.  For a more detailed view of what is lowering, please     XX
+XX  take a look at Lower.cpp                                                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef TARGET_LOONGARCH64 // This file is ONLY used for LOONGARCH64 architectures
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+#include "lsra.h"
+
+#ifdef FEATURE_HW_INTRINSICS
+#include "hwintrinsic.h"
+#endif
+
+//------------------------------------------------------------------------
+// IsCallTargetInRange: Can a call target address be encoded in-place?
+//
+// Return Value:
+//    True if the addr fits into the range.
+//
+bool Lowering::IsCallTargetInRange(void* addr)
+{
+    ////TODO for LOONGARCH64: should amend for optimize!
+    //assert(!"unimplemented on LOONGARCH yet");
+    //return comp->codeGen->validImmForBAL((ssize_t)addr);
+    return false;
+}
+
+//------------------------------------------------------------------------
+// IsContainableImmed: Is an immediate encodable in-place?
+//
+// Return Value:
+//    True if the immediate can be folded into an instruction,
+//    for example small enough and non-relocatable.
+//
+bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
+{
+    if (!varTypeIsFloating(parentNode->TypeGet()))
+    {
+        // Make sure we have an actual immediate
+        if (!childNode->IsCnsIntOrI())
+            return false;
+        if (childNode->AsIntCon()->ImmedValNeedsReloc(comp))
+            return false;
+
+        // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type.
+        target_ssize_t immVal = (target_ssize_t)childNode->AsIntCon()->gtIconVal;
+        emitAttr       attr   = emitActualTypeSize(childNode->TypeGet());
+        emitAttr       size   = EA_SIZE(attr);
+
+        switch (parentNode->OperGet())
+        {
+            case GT_ADD:
+                return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false
+                                                                  : ((-2048 <= immVal) && (immVal <= 2047));
+                break;
+            case GT_CMPXCHG:
+            case GT_LOCKADD:
+            case GT_XADD:
+                assert(!"unimplemented on LOONGARCH yet");
+                break;
+
+            case GT_EQ:
+            case GT_NE:
+            case GT_LT:
+            case GT_LE:
+            case GT_GE:
+            case GT_GT:
+#ifdef FEATURE_SIMD
+            case GT_SIMD_CHK:
+#endif
+#ifdef FEATURE_HW_INTRINSICS
+            case GT_HW_INTRINSIC_CHK:
+#endif
+                return ((-32768 <= immVal) && (immVal <= 32767));
+            case GT_AND:
+            case GT_OR:
+            case GT_XOR:
+                return ((-2048 <= immVal) && (immVal <= 2047));
+            case GT_JCMP:
+                assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
+                return true;
+
+            case GT_STORE_LCL_FLD:
+            case GT_STORE_LCL_VAR:
+                if (immVal == 0)
+                    return true;
+                break;
+
+            default:
+                break;
+        }
+    }
+
+    return false;
+}
+
+//------------------------------------------------------------------------
+// LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node.
+//
+// TODO: For LoongArch64 recognized GT_MULs that can be turned into GT_MUL_LONGs, as
+// those are cheaper. Performs contaiment checks.
+//
+// Arguments:
+//    mul - The node to lower
+//
+// Return Value:
+//    The next node to lower.
+//
+GenTree* Lowering::LowerMul(GenTreeOp* mul)
+{
+    assert(mul->OperIsMul());
+
+    //if (comp->opts.OptimizationEnabled() && mul->OperIs(GT_MUL) && mul->IsValidLongMul())
+    //{
+    //    GenTreeCast* op1 = mul->gtGetOp1()->AsCast();
+    //    GenTree*     op2 = mul->gtGetOp2();
+
+    //    mul->ClearOverflow();
+    //    mul->ClearUnsigned();
+    //    if (op1->IsUnsigned())
+    //    {
+    //        mul->SetUnsigned();
+    //    }
+
+    //    mul->gtOp1 = op1->CastOp();
+    //    BlockRange().Remove(op1);
+
+    //    if (op2->OperIs(GT_CAST))
+    //    {
+    //        mul->gtOp2 = op2->AsCast()->CastOp();
+    //        BlockRange().Remove(op2);
+    //    }
+    //    else
+    //    {
+    //        assert(op2->IsIntegralConst());
+    //        assert(FitsIn<int32_t>(op2->AsIntConCommon()->IntegralValue()));
+
+    //        op2->ChangeType(TYP_INT);
+    //    }
+
+    //    mul->ChangeOper(GT_MUL_LONG);
+    //}
+
+    ContainCheckMul(mul);
+
+    return mul->gtNext;
+}
+
+//------------------------------------------------------------------------
+// LowerStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+//    storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+//    This involves:
+//    - Widening operations of unsigneds.
+//
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+    // Try to widen the ops if they are going into a local var.
+    GenTree* op1 = storeLoc->gtGetOp1();
+    if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
+    {
+        GenTreeIntCon* con    = op1->AsIntCon();
+        ssize_t        ival   = con->gtIconVal;
+        unsigned       varNum = storeLoc->GetLclNum();
+        LclVarDsc*     varDsc = comp->lvaGetDesc(varNum);
+
+        if (varDsc->lvIsSIMDType())
+        {
+            noway_assert(storeLoc->gtType != TYP_STRUCT);
+        }
+        unsigned size = genTypeSize(storeLoc);
+        // If we are storing a constant into a local variable
+        // we extend the size of the store here
+        if ((size < 4) && !varTypeIsStruct(varDsc))
+        {
+            if (!varTypeIsUnsigned(varDsc))
+            {
+                if (genTypeSize(storeLoc) == 1)
+                {
+                    if ((ival & 0x7f) != ival)
+                    {
+                        ival = ival | 0xffffff00;
+                    }
+                }
+                else
+                {
+                    assert(genTypeSize(storeLoc) == 2);
+                    if ((ival & 0x7fff) != ival)
+                    {
+                        ival = ival | 0xffff0000;
+                    }
+                }
+            }
+
+            // A local stack slot is at least 4 bytes in size, regardless of
+            // what the local var is typed as, so auto-promote it here
+            // unless it is a field of a promoted struct
+            // TODO-CQ: if the field is promoted shouldn't we also be able to do this?
+            if (!varDsc->lvIsStructField)
+            {
+                storeLoc->gtType = TYP_INT;
+                con->SetIconValue(ival);
+            }
+        }
+    }
+    if (storeLoc->OperIs(GT_STORE_LCL_FLD))
+    {
+        // We should only encounter this for lclVars that are lvDoNotEnregister.
+        verifyLclFldDoNotEnregister(storeLoc->GetLclNum());
+    }
+    ContainCheckStoreLoc(storeLoc);
+}
+
+//------------------------------------------------------------------------
+// LowerStoreIndir: Determine addressing mode for an indirection, and whether operands are contained.
+//
+// Arguments:
+//    node       - The indirect store node (GT_STORE_IND) of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::LowerStoreIndir(GenTreeStoreInd* node)
+{
+    ContainCheckStoreIndir(node);
+}
+
+//------------------------------------------------------------------------
+// LowerBlockStore: Set block store type
+//
+// Arguments:
+//    blkNode       - The block store node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
+{
+    GenTree*  dstAddr  = blkNode->Addr();
+    GenTree* src     = blkNode->Data();
+    unsigned size    = blkNode->Size();
+
+    if (blkNode->OperIsInitBlkOp())
+    {
+        if (src->OperIs(GT_INIT_VAL))
+        {
+            src->SetContained();
+            src = src->AsUnOp()->gtGetOp1();
+        }
+        if (blkNode->OperIs(GT_STORE_OBJ))
+        {
+            blkNode->SetOper(GT_STORE_BLK);
+        }
+
+        if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= INITBLK_UNROLL_LIMIT) && src->OperIs(GT_CNS_INT))
+        {
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+
+            // The fill value of an initblk is interpreted to hold a
+            // value of (unsigned int8) however a constant of any size
+            // may practically reside on the evaluation stack. So extract
+            // the lower byte out of the initVal constant and replicate
+            // it to a larger constant whose size is sufficient to support
+            // the largest width store of the desired inline expansion.
+
+            ssize_t fill = src->AsIntCon()->IconValue() & 0xFF;
+            if (fill == 0)
+            {
+                src->SetContained();;
+            }
+            else if (size >= REGSIZE_BYTES)
+            {
+                fill *= 0x0101010101010101LL;
+                src->gtType = TYP_LONG;
+            }
+            else
+            {
+                fill *= 0x01010101;
+            }
+            src->AsIntCon()->SetIconValue(fill);
+
+            ContainBlockStoreAddress(blkNode, size, dstAddr);
+        }
+        else
+        {
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+        }
+    }
+    else
+    {
+        assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD));
+        src->SetContained();
+
+        if (src->OperIs(GT_IND))
+        {
+            // TODO-Cleanup: Make sure that GT_IND lowering didn't mark the source address as contained.
+            // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the
+            // address, not knowing that GT_IND is part of a block op that has containment restrictions.
+            src->AsIndir()->Addr()->ClearContained();
+        }
+        else if (src->OperIs(GT_LCL_VAR))
+        {
+            // TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register.
+            const unsigned srcLclNum = src->AsLclVar()->GetLclNum();
+            comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
+        }
+        if (blkNode->OperIs(GT_STORE_OBJ))
+        {
+            if (!blkNode->AsObj()->GetLayout()->HasGCPtr())
+            {
+                blkNode->SetOper(GT_STORE_BLK);
+            }
+            else if (dstAddr->OperIsLocalAddr() && (size <= CPBLK_UNROLL_LIMIT))
+            {
+                // If the size is small enough to unroll then we need to mark the block as non-interruptible
+                // to actually allow unrolling. The generated code does not report GC references loaded in the
+                // temporary register(s) used for copying.
+                blkNode->SetOper(GT_STORE_BLK);
+                blkNode->gtBlkOpGcUnsafe = true;
+            }
+        }
+
+        // CopyObj or CopyBlk
+        if (blkNode->OperIs(GT_STORE_OBJ))
+        {
+            assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL));
+
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+        }
+////////////////////////////////////////////////////////////////////////////////////////////////////////
+        else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT))
+        {
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+
+            if (src->OperIs(GT_IND))
+            {
+                ContainBlockStoreAddress(blkNode, size, src->AsIndir()->Addr());
+            }
+
+            ContainBlockStoreAddress(blkNode, size, dstAddr);
+        }
+        else
+        {
+            assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
+
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+        }
+
+    }
+}
+
+//------------------------------------------------------------------------
+// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
+//
+// Arguments:
+//    tree - GT_CAST node to be lowered
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Casts from float/double to a smaller int type are transformed as follows:
+//    GT_CAST(float/double, byte)     =   GT_CAST(GT_CAST(float/double, int32), byte)
+//    GT_CAST(float/double, sbyte)    =   GT_CAST(GT_CAST(float/double, int32), sbyte)
+//    GT_CAST(float/double, int16)    =   GT_CAST(GT_CAST(double/double, int32), int16)
+//    GT_CAST(float/double, uint16)   =   GT_CAST(GT_CAST(double/double, int32), uint16)
+//
+//    Note that for the overflow conversions we still depend on helper calls and
+//    don't expect to see them here.
+//    i) GT_CAST(float/double, int type with overflow detection)
+//
+
+void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr)
+{
+    assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll));
+    assert(size < INT32_MAX);
+
+    if (addr->OperIsLocalAddr())
+    {
+        addr->SetContained();
+        return;
+    }
+
+    if (!addr->OperIs(GT_ADD) || addr->gtOverflow() || !addr->AsOp()->gtGetOp2()->OperIs(GT_CNS_INT))
+    {
+        return;
+    }
+
+    GenTreeIntCon* offsetNode = addr->AsOp()->gtGetOp2()->AsIntCon();
+    ssize_t        offset     = offsetNode->IconValue();
+
+    // All integer load/store instructions on both ARM32 and ARM64 support
+    // offsets in range -255..255. Of course, this is a rather conservative
+    // check. For example, if the offset and size are a multiple of 8 we
+    // could allow a combined offset of up to 32760 on ARM64.
+    if ((offset < -255) || (offset > 255) || (offset + static_cast<int>(size) > 256))
+    {
+        return;
+    }
+
+    if (!IsSafeToContainMem(blkNode, addr))
+    {
+        return;
+    }
+
+    BlockRange().Remove(offsetNode);
+
+    addr->ChangeOper(GT_LEA);
+    addr->AsAddrMode()->SetIndex(nullptr);
+    addr->AsAddrMode()->SetScale(0);
+    addr->AsAddrMode()->SetOffset(static_cast<int>(offset));
+    addr->SetContained();
+}
+
+
+
+
+void Lowering::LowerCast(GenTree* tree)
+{
+    assert(tree->OperGet() == GT_CAST);
+
+    JITDUMP("LowerCast for: ");
+    DISPNODE(tree);
+    JITDUMP("\n");
+
+    GenTree*  op1     = tree->AsOp()->gtOp1;
+    var_types dstType = tree->CastToType();
+    var_types srcType = genActualType(op1->TypeGet());
+    var_types tmpType = TYP_UNDEF;
+
+    if (varTypeIsFloating(srcType))
+    {
+        noway_assert(!tree->gtOverflow());
+        assert(!varTypeIsSmall(dstType)); // fgMorphCast creates intermediate casts when converting from float to small
+                                          // int.
+    }
+
+    assert(!varTypeIsSmall(srcType));
+
+    if (tmpType != TYP_UNDEF)
+    {
+        GenTree* tmp = comp->gtNewCastNode(tmpType, op1, tree->IsUnsigned(), tmpType);
+        tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
+
+        tree->gtFlags &= ~GTF_UNSIGNED;
+        tree->AsOp()->gtOp1 = tmp;
+        BlockRange().InsertAfter(op1, tmp);
+    }
+
+    // Now determine if we have operands that should be contained.
+    ContainCheckCast(tree->AsCast());
+}
+
+//------------------------------------------------------------------------
+// LowerRotate: Lower GT_ROL and GT_ROR nodes.
+//
+// Arguments:
+//    tree - the node to lower
+//
+// Return Value:
+//    None.
+//
+void Lowering::LowerRotate(GenTree* tree)
+{
+    if (tree->OperGet() == GT_ROL)
+    {
+        // Convert ROL into ROR.
+        GenTree* rotatedValue        = tree->AsOp()->gtOp1;
+        unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+        GenTree* rotateLeftIndexNode = tree->AsOp()->gtOp2;
+
+        if (rotateLeftIndexNode->IsCnsIntOrI())
+        {
+            ssize_t rotateLeftIndex                 = rotateLeftIndexNode->AsIntCon()->gtIconVal;
+            ssize_t rotateRightIndex                = rotatedValueBitSize - rotateLeftIndex;
+            rotateLeftIndexNode->AsIntCon()->gtIconVal = rotateRightIndex;
+        }
+        else
+        {
+            GenTree* tmp = comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+            BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
+            tree->AsOp()->gtOp2 = tmp;
+        }
+        tree->ChangeOper(GT_ROR);
+    }
+    ContainCheckShiftRotate(tree->AsOp());
+}
+
+#ifdef FEATURE_SIMD
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerSIMD: Perform containment analysis for a SIMD intrinsic node.
+//
+//  Arguments:
+//     simdNode - The SIMD intrinsic node.
+//
+void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    assert(simdNode->gtType != TYP_SIMD32);
+
+    if (simdNode->TypeGet() == TYP_SIMD12)
+    {
+        // GT_SIMD node requiring to produce TYP_SIMD12 in fact
+        // produces a TYP_SIMD16 result
+        simdNode->gtType = TYP_SIMD16;
+    }
+
+    ContainCheckSIMD(simdNode);
+#endif
+}
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    auto intrinsicID   = node->gtHWIntrinsicId;
+    auto intrinsicInfo = HWIntrinsicInfo::lookup(node->gtHWIntrinsicId);
+
+    //
+    // Lower unsupported Unsigned Compare Zero intrinsics to their trivial transformations
+    //
+    // LOONGARCH64 does not support most forms of compare zero for Unsigned values
+    // This is because some are non-sensical, and the rest are trivial transformations of other operators
+    //
+    if ((intrinsicInfo.flags & HWIntrinsicInfo::LowerCmpUZero) && varTypeIsUnsigned(node->gtSIMDBaseType))
+    {
+        auto setAllVector = node->gtSIMDSize > 8 ? NI_LOONGARCH64_SIMD_SetAllVector128 : NI_LOONGARCH64_SIMD_SetAllVector64;
+
+        auto origOp1 = node->gtOp.gtOp1;
+
+        switch (intrinsicID)
+        {
+            case NI_LOONGARCH64_SIMD_GT_ZERO:
+                // Unsigned > 0 ==> !(Unsigned == 0)
+                node->gtOp.gtOp1 =
+                    comp->gtNewSimdHWIntrinsicNode(node->TypeGet(), node->gtOp.gtOp1, NI_LOONGARCH64_SIMD_EQ_ZERO,
+                                                   node->gtSIMDBaseType, node->gtSIMDSize);
+                node->gtHWIntrinsicId = NI_LOONGARCH64_SIMD_BitwiseNot;
+                BlockRange().InsertBefore(node, node->gtOp.gtOp1);
+                break;
+            case NI_LOONGARCH64_SIMD_LE_ZERO:
+                // Unsigned <= 0 ==> Unsigned == 0
+                node->gtHWIntrinsicId = NI_LOONGARCH64_SIMD_EQ_ZERO;
+                break;
+            case NI_LOONGARCH64_SIMD_GE_ZERO:
+            case NI_LOONGARCH64_SIMD_LT_ZERO:
+                // Unsigned >= 0 ==> Always true
+                // Unsigned < 0 ==> Always false
+                node->gtHWIntrinsicId = setAllVector;
+                node->gtOp.gtOp1      = comp->gtNewLconNode((intrinsicID == NI_LOONGARCH64_SIMD_GE_ZERO) ? ~0ULL : 0ULL);
+                BlockRange().InsertBefore(node, node->gtOp.gtOp1);
+                if ((origOp1->gtFlags & GTF_ALL_EFFECT) == 0)
+                {
+                    BlockRange().Remove(origOp1, true);
+                }
+                else
+                {
+                    origOp1->SetUnusedValue();
+                }
+                break;
+            default:
+                assert(!"Unhandled LowerCmpUZero case");
+        }
+    }
+
+    ContainCheckHWIntrinsic(node);
+#endif
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::IsValidConstForMovImm: Determines if the given node can be replaced by a mov/fmov immediate instruction
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+//  Returns:
+//     true if the node can be replaced by a mov/fmov immediate instruction; otherwise, false
+//
+//  IMPORTANT:
+//     This check may end up modifying node->gtOp1 if it is a cast node that can be removed
+bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node)
+{
+    assert((node->gtHWIntrinsicId == NI_Vector64_Create) || (node->gtHWIntrinsicId == NI_Vector128_Create) ||
+           (node->gtHWIntrinsicId == NI_Vector64_CreateScalarUnsafe) ||
+           (node->gtHWIntrinsicId == NI_Vector128_CreateScalarUnsafe) ||
+           (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector64) ||
+           (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector128) ||
+           (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector64) ||
+           (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector128));
+    assert(HWIntrinsicInfo::lookupNumArgs(node) == 1);
+
+    GenTree* op1    = node->gtOp1;
+    GenTree* castOp = nullptr;
+
+    if (varTypeIsIntegral(node->GetSimdBaseType()) && op1->OperIs(GT_CAST))
+    {
+        // We will sometimes get a cast around a constant value (such as for
+        // certain long constants) which would block the below containment.
+        // So we will temporarily check what the cast is from instead so we
+        // can catch those cases as well.
+
+        castOp = op1->AsCast()->CastOp();
+        op1    = castOp;
+    }
+
+    if (op1->IsCnsIntOrI())
+    {
+        const ssize_t dataValue = op1->AsIntCon()->gtIconVal;
+
+        if (comp->GetEmitter()->emitIns_valid_imm_for_movi(dataValue, emitActualTypeSize(node->GetSimdBaseType())))
+        {
+            if (castOp != nullptr)
+            {
+                // We found a containable immediate under
+                // a cast, so remove the cast from the LIR.
+
+                BlockRange().Remove(node->gtOp1);
+                node->gtOp1 = op1;
+            }
+            return true;
+        }
+    }
+    else if (op1->IsCnsFltOrDbl())
+    {
+        assert(varTypeIsFloating(node->GetSimdBaseType()));
+        assert(castOp == nullptr);
+
+        const double dataValue = op1->AsDblCon()->gtDconVal;
+        return comp->GetEmitter()->emitIns_valid_imm_for_fmov(dataValue);
+    }
+
+    return false;
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic
+//
+//  Arguments:
+//     node  - The hardware intrinsic node.
+//     cmpOp - The comparison operation, currently must be GT_EQ or GT_NE
+//
+void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
+{
+    NamedIntrinsic intrinsicId     = node->gtHWIntrinsicId;
+    CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
+    var_types      simdBaseType    = node->GetSimdBaseType();
+    unsigned       simdSize        = node->GetSimdSize();
+    var_types      simdType        = Compiler::getSIMDTypeForSize(simdSize);
+
+    assert((intrinsicId == NI_Vector64_op_Equality) || (intrinsicId == NI_Vector64_op_Inequality) ||
+           (intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality));
+
+    assert(varTypeIsSIMD(simdType));
+    assert(varTypeIsArithmetic(simdBaseType));
+    assert(simdSize != 0);
+    assert(node->gtType == TYP_BOOL);
+    assert((cmpOp == GT_EQ) || (cmpOp == GT_NE));
+
+    // We have the following (with the appropriate simd size and where the intrinsic could be op_Inequality):
+    //          /--*  op2  simd
+    //          /--*  op1  simd
+    //   node = *  HWINTRINSIC   simd   T op_Equality
+
+    GenTree* op1 = node->gtGetOp1();
+    GenTree* op2 = node->gtGetOp2();
+
+    NamedIntrinsic cmpIntrinsic;
+
+    switch (simdBaseType)
+    {
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        case TYP_SHORT:
+        case TYP_USHORT:
+        case TYP_INT:
+        case TYP_UINT:
+        case TYP_FLOAT:
+        {
+            cmpIntrinsic = NI_AdvSimd_CompareEqual;
+            break;
+        }
+
+        case TYP_LONG:
+        case TYP_ULONG:
+        case TYP_DOUBLE:
+        {
+            cmpIntrinsic = NI_AdvSimd_Arm64_CompareEqual;
+            break;
+        }
+
+        default:
+        {
+            unreached();
+        }
+    }
+
+    GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, simdBaseJitType, simdSize);
+    BlockRange().InsertBefore(node, cmp);
+    LowerNode(cmp);
+
+    if ((simdBaseType == TYP_FLOAT) && (simdSize == 12))
+    {
+        // For TYP_SIMD12 we don't want the upper bits to participate in the comparison. So, we will insert all ones
+        // into those bits of the result, "as if" the upper bits are equal. Then if all lower bits are equal, we get the
+        // expected all-ones result, and will get the expected 0's only where there are non-matching bits.
+
+        GenTree* idxCns = comp->gtNewIconNode(3, TYP_INT);
+        BlockRange().InsertAfter(cmp, idxCns);
+
+        GenTree* insCns = comp->gtNewIconNode(-1, TYP_INT);
+        BlockRange().InsertAfter(idxCns, insCns);
+
+        GenTree* tmp = comp->gtNewSimdAsHWIntrinsicNode(simdType, cmp, idxCns, insCns, NI_AdvSimd_Insert,
+                                                        CORINFO_TYPE_INT, simdSize);
+        BlockRange().InsertAfter(insCns, tmp);
+        LowerNode(tmp);
+
+        cmp = tmp;
+    }
+
+    GenTree* msk =
+        comp->gtNewSimdHWIntrinsicNode(simdType, cmp, NI_AdvSimd_Arm64_MinAcross, CORINFO_TYPE_UBYTE, simdSize);
+    BlockRange().InsertAfter(cmp, msk);
+    LowerNode(msk);
+
+    GenTree* zroCns = comp->gtNewIconNode(0, TYP_INT);
+    BlockRange().InsertAfter(msk, zroCns);
+
+    GenTree* val =
+        comp->gtNewSimdAsHWIntrinsicNode(TYP_UBYTE, msk, zroCns, NI_AdvSimd_Extract, CORINFO_TYPE_UBYTE, simdSize);
+    BlockRange().InsertAfter(zroCns, val);
+    LowerNode(val);
+
+    zroCns = comp->gtNewIconNode(0, TYP_INT);
+    BlockRange().InsertAfter(val, zroCns);
+
+    node->ChangeOper(cmpOp);
+
+    node->gtType = TYP_INT;
+    node->gtOp1  = val;
+    node->gtOp2  = zroCns;
+
+    // The CompareEqual will set (condition is true) or clear (condition is false) all bits of the respective element
+    // The MinAcross then ensures we get either all bits set (all conditions are true) or clear (any condition is false)
+    // So, we need to invert the condition from the operation since we compare against zero
+
+    GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::NE : GenCondition::EQ;
+    GenTree*     cc     = LowerNodeCC(node, cmpCnd);
+
+    node->gtType = TYP_VOID;
+    node->ClearUnusedValue();
+
+    LowerNode(node);
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerHWIntrinsicCreate: Lowers a Vector64 or Vector128 Create call
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
+{
+    NamedIntrinsic intrinsicId     = node->gtHWIntrinsicId;
+    var_types      simdType        = node->gtType;
+    CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
+    var_types      simdBaseType    = node->GetSimdBaseType();
+    unsigned       simdSize        = node->GetSimdSize();
+    VectorConstant vecCns          = {};
+
+    if ((simdSize == 8) && (simdType == TYP_DOUBLE))
+    {
+        // TODO-Cleanup: Struct retyping means we have the wrong type here. We need to
+        //               manually fix it up so the simdType checks below are correct.
+        simdType = TYP_SIMD8;
+    }
+
+    assert(varTypeIsSIMD(simdType));
+    assert(varTypeIsArithmetic(simdBaseType));
+    assert(simdSize != 0);
+
+    GenTreeArgList* argList = nullptr;
+    GenTree*        op1     = node->gtGetOp1();
+    GenTree*        op2     = node->gtGetOp2();
+
+    // Spare GenTrees to be used for the lowering logic below
+    // Defined upfront to avoid naming conflicts, etc...
+    GenTree* idx  = nullptr;
+    GenTree* tmp1 = nullptr;
+    GenTree* tmp2 = nullptr;
+    GenTree* tmp3 = nullptr;
+
+    assert(op1 != nullptr);
+
+    unsigned argCnt    = 0;
+    unsigned cnsArgCnt = 0;
+
+    if (op1->OperIsList())
+    {
+        assert(op2 == nullptr);
+
+        for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest())
+        {
+            if (HandleArgForHWIntrinsicCreate(argList->Current(), argCnt, vecCns, simdBaseType))
+            {
+                cnsArgCnt += 1;
+            }
+            argCnt += 1;
+        }
+    }
+    else
+    {
+        if (HandleArgForHWIntrinsicCreate(op1, argCnt, vecCns, simdBaseType))
+        {
+            cnsArgCnt += 1;
+        }
+        argCnt += 1;
+
+        if (op2 != nullptr)
+        {
+            if (HandleArgForHWIntrinsicCreate(op2, argCnt, vecCns, simdBaseType))
+            {
+                cnsArgCnt += 1;
+            }
+            argCnt += 1;
+        }
+        else if (cnsArgCnt == 1)
+        {
+            // These intrinsics are meant to set the same value to every element
+            // so we'll just specially handle it here and copy it into the remaining
+            // indices.
+
+            for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
+            {
+                HandleArgForHWIntrinsicCreate(op1, i, vecCns, simdBaseType);
+            }
+        }
+    }
+    assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType))));
+
+    if ((argCnt == cnsArgCnt) && (argCnt == 1))
+    {
+        GenTree* castOp = nullptr;
+
+        if (varTypeIsIntegral(simdBaseType) && op1->OperIs(GT_CAST))
+        {
+            // We will sometimes get a cast around a constant value (such as for
+            // certain long constants) which would block the below containment.
+            // So we will temporarily check what the cast is from instead so we
+            // can catch those cases as well.
+
+            castOp = op1->AsCast()->CastOp();
+            op1    = castOp;
+        }
+
+        if (IsValidConstForMovImm(node))
+        {
+            // Set the cnsArgCnt to zero so we get lowered to a DuplicateToVector
+            // intrinsic, which will itself mark the node as contained.
+            cnsArgCnt = 0;
+
+            // Reacquire op1 as the above check may have removed a cast node and
+            // changed op1.
+            op1 = node->gtOp1;
+        }
+    }
+
+    if (argCnt == cnsArgCnt)
+    {
+        if (op1->OperIsList())
+        {
+            for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest())
+            {
+                BlockRange().Remove(argList->Current());
+            }
+        }
+        else
+        {
+            BlockRange().Remove(op1);
+
+            if (op2 != nullptr)
+            {
+                BlockRange().Remove(op2);
+            }
+        }
+
+        assert((simdSize == 8) || (simdSize == 16));
+
+        if (VectorConstantIsBroadcastedI64(vecCns, simdSize / 8))
+        {
+            // If we are a single constant or if all parts are the same, we might be able to optimize
+            // this even further for certain values, such as Zero or AllBitsSet.
+
+            if (vecCns.i64[0] == 0)
+            {
+                node->gtOp1           = nullptr;
+                node->gtOp2           = nullptr;
+                node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_Zero : NI_Vector128_get_Zero;
+                return;
+            }
+            else if (vecCns.i64[0] == -1)
+            {
+                node->gtOp1           = nullptr;
+                node->gtOp2           = nullptr;
+                node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_AllBitsSet : NI_Vector128_get_AllBitsSet;
+                return;
+            }
+        }
+
+        unsigned  cnsSize  = (simdSize == 12) ? 16 : simdSize;
+        unsigned  cnsAlign = cnsSize;
+        var_types dataType = Compiler::getSIMDTypeForSize(simdSize);
+
+        UNATIVE_OFFSET       cnum = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType);
+        CORINFO_FIELD_HANDLE hnd  = comp->eeFindJitDataOffs(cnum);
+        GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(GT_CLS_VAR_ADDR, TYP_I_IMPL, hnd, nullptr);
+        BlockRange().InsertBefore(node, clsVarAddr);
+
+        node->ChangeOper(GT_IND);
+        node->gtOp1 = clsVarAddr;
+
+        // TODO-ARM64-CQ: We should be able to modify at least the paths that use Insert to trivially support partial
+        // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just
+        // insert the non-constant values which should still allow some gains.
+
+        return;
+    }
+    else if (argCnt == 1)
+    {
+        // We have the following (where simd is simd8 or simd16):
+        //          /--*  op1  T
+        //   node = *  HWINTRINSIC   simd   T Create
+
+        // We will be constructing the following parts:
+        //           /--*  op1  T
+        //   node  = *  HWINTRINSIC   simd   T DuplicateToVector
+
+        // This is roughly the following managed code:
+        //   return AdvSimd.Arm64.DuplicateToVector(op1);
+
+        if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE))
+        {
+            node->gtHWIntrinsicId =
+                (simdType == TYP_SIMD8) ? NI_AdvSimd_Arm64_DuplicateToVector64 : NI_AdvSimd_Arm64_DuplicateToVector128;
+        }
+        else
+        {
+            node->gtHWIntrinsicId =
+                (simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64 : NI_AdvSimd_DuplicateToVector128;
+        }
+        return;
+    }
+
+    // We have the following (where simd is simd8 or simd16):
+    //          /--*  op1 T
+    //          +--*  ... T
+    //          +--*  opN T
+    //   node = *  HWINTRINSIC   simd   T Create
+
+    if (op1->OperIsList())
+    {
+        argList = op1->AsArgList();
+        op1     = argList->Current();
+        argList = argList->Rest();
+    }
+
+    // We will be constructing the following parts:
+    //          /--*  op1  T
+    //   tmp1 = *  HWINTRINSIC   simd8  T CreateScalarUnsafe
+    //   ...
+
+    // This is roughly the following managed code:
+    //   var tmp1 = Vector64.CreateScalarUnsafe(op1);
+    //   ...
+
+    NamedIntrinsic createScalarUnsafe =
+        (simdType == TYP_SIMD8) ? NI_Vector64_CreateScalarUnsafe : NI_Vector128_CreateScalarUnsafe;
+
+    tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, createScalarUnsafe, simdBaseJitType, simdSize);
+    BlockRange().InsertAfter(op1, tmp1);
+    LowerNode(tmp1);
+
+    unsigned N   = 0;
+    GenTree* opN = nullptr;
+
+    for (N = 1; N < argCnt - 1; N++)
+    {
+        // We will be constructing the following parts:
+        //   ...
+        //   idx  =    CNS_INT       int    N
+        //          /--*  tmp1 simd
+        //          +--*  idx  int
+        //          +--*  opN  T
+        //   tmp1 = *  HWINTRINSIC   simd   T Insert
+        //   ...
+
+        // This is roughly the following managed code:
+        //   ...
+        //   tmp1 = AdvSimd.Insert(tmp1, N, opN);
+        //   ...
+
+        opN = argList->Current();
+
+        idx = comp->gtNewIconNode(N, TYP_INT);
+        BlockRange().InsertBefore(opN, idx);
+
+        tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, idx, opN, NI_AdvSimd_Insert, simdBaseJitType, simdSize);
+        BlockRange().InsertAfter(opN, tmp1);
+        LowerNode(tmp1);
+
+        argList = argList->Rest();
+    }
+
+    assert(N == (argCnt - 1));
+
+    // We will be constructing the following parts:
+    //   idx  =    CNS_INT       int    N
+    //          /--*  tmp1 simd
+    //          +--*  idx  int
+    //          +--*  opN  T
+    //   node = *  HWINTRINSIC   simd   T Insert
+
+    // This is roughly the following managed code:
+    //   ...
+    //   tmp1 = AdvSimd.Insert(tmp1, N, opN);
+    //   ...
+
+    opN = (argCnt == 2) ? op2 : argList->Current();
+
+    idx = comp->gtNewIconNode(N, TYP_INT);
+    BlockRange().InsertBefore(opN, idx);
+
+    node->gtOp1 = comp->gtNewArgList(tmp1, idx, opN);
+    node->gtOp2 = nullptr;
+
+    node->gtHWIntrinsicId = NI_AdvSimd_Insert;
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerHWIntrinsicDot: Lowers a Vector64 or Vector128 Dot call
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
+{
+    NamedIntrinsic intrinsicId     = node->gtHWIntrinsicId;
+    CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
+    var_types      simdBaseType    = node->GetSimdBaseType();
+    unsigned       simdSize        = node->GetSimdSize();
+    var_types      simdType        = Compiler::getSIMDTypeForSize(simdSize);
+
+    assert((intrinsicId == NI_Vector64_Dot) || (intrinsicId == NI_Vector128_Dot));
+    assert(varTypeIsSIMD(simdType));
+    assert(varTypeIsArithmetic(simdBaseType));
+    assert(simdSize != 0);
+
+    GenTree* op1 = node->gtGetOp1();
+    GenTree* op2 = node->gtGetOp2();
+
+    assert(op1 != nullptr);
+    assert(op2 != nullptr);
+    assert(!op1->OperIsList());
+
+    // Spare GenTrees to be used for the lowering logic below
+    // Defined upfront to avoid naming conflicts, etc...
+    GenTree* idx  = nullptr;
+    GenTree* tmp1 = nullptr;
+    GenTree* tmp2 = nullptr;
+
+    if (simdSize == 12)
+    {
+        assert(simdBaseType == TYP_FLOAT);
+
+        // For 12 byte SIMD, we need to clear the upper 4 bytes:
+        //   idx  =    CNS_INT       int    0x03
+        //   tmp1 = *  CNS_DLB       float  0.0
+        //          /--*  op1  simd16
+        //          +--*  idx  int
+        //          +--*  tmp1 simd16
+        //   op1  = *  HWINTRINSIC   simd16 T Insert
+        //   ...
+
+        // This is roughly the following managed code:
+        //    op1 = AdvSimd.Insert(op1, 0x03, 0.0f);
+        //    ...
+
+        idx = comp->gtNewIconNode(0x03, TYP_INT);
+        BlockRange().InsertAfter(op1, idx);
+
+        tmp1 = comp->gtNewZeroConNode(TYP_FLOAT);
+        BlockRange().InsertAfter(idx, tmp1);
+        LowerNode(tmp1);
+
+        op1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op1, idx, tmp1, NI_AdvSimd_Insert, simdBaseJitType, simdSize);
+        BlockRange().InsertAfter(tmp1, op1);
+        LowerNode(op1);
+
+        idx = comp->gtNewIconNode(0x03, TYP_INT);
+        BlockRange().InsertAfter(op2, idx);
+
+        tmp2 = comp->gtNewZeroConNode(TYP_FLOAT);
+        BlockRange().InsertAfter(idx, tmp2);
+        LowerNode(tmp2);
+
+        op2 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op2, idx, tmp2, NI_AdvSimd_Insert, simdBaseJitType, simdSize);
+        BlockRange().InsertAfter(tmp2, op2);
+        LowerNode(op2);
+    }
+
+    // We will be constructing the following parts:
+    //   ...
+    //          /--*  op1  simd16
+    //          +--*  op2  simd16
+    //   tmp1 = *  HWINTRINSIC   simd16 T Multiply
+    //   ...
+
+    // This is roughly the following managed code:
+    //   ...
+    //   var tmp1 = AdvSimd.Multiply(op1, op2);
+    //   ...
+
+    NamedIntrinsic multiply = (simdBaseType == TYP_DOUBLE) ? NI_AdvSimd_Arm64_Multiply : NI_AdvSimd_Multiply;
+    assert(!varTypeIsLong(simdBaseType));
+
+    tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op1, op2, multiply, simdBaseJitType, simdSize);
+    BlockRange().InsertBefore(node, tmp1);
+    LowerNode(tmp1);
+
+    if (varTypeIsFloating(simdBaseType))
+    {
+        // We will be constructing the following parts:
+        //   ...
+        //          /--*  tmp1 simd16
+        //          *  STORE_LCL_VAR simd16
+        //   tmp1 =    LCL_VAR       simd16
+        //   tmp2 =    LCL_VAR       simd16
+        //   ...
+
+        // This is roughly the following managed code:
+        //   ...
+        //   var tmp2 = tmp1;
+        //   ...
+
+        node->gtOp1 = tmp1;
+        LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node);
+        ReplaceWithLclVar(tmp1Use);
+        tmp1 = node->gtOp1;
+
+        tmp2 = comp->gtClone(tmp1);
+        BlockRange().InsertAfter(tmp1, tmp2);
+
+        if (simdSize == 8)
+        {
+            assert(simdBaseType == TYP_FLOAT);
+
+            // We will be constructing the following parts:
+            //   ...
+            //          /--*  tmp1 simd8
+            //          +--*  tmp2 simd8
+            //   tmp1 = *  HWINTRINSIC   simd8  T AddPairwise
+            //   ...
+
+            // This is roughly the following managed code:
+            //   ...
+            //   var tmp1 = AdvSimd.AddPairwise(tmp1, tmp2);
+            //   ...
+
+            tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_AddPairwise, simdBaseJitType,
+                                                    simdSize);
+            BlockRange().InsertAfter(tmp2, tmp1);
+            LowerNode(tmp1);
+        }
+        else
+        {
+            assert((simdSize == 12) || (simdSize == 16));
+
+            // We will be constructing the following parts:
+            //   ...
+            //          /--*  tmp1 simd16
+            //          +--*  tmp2 simd16
+            //   tmp2 = *  HWINTRINSIC   simd16 T AddPairwise
+            //   ...
+
+            // This is roughly the following managed code:
+            //   ...
+            //   var tmp1 = AdvSimd.Arm64.AddPairwise(tmp1, tmp2);
+            //   ...
+
+            tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_Arm64_AddPairwise, simdBaseJitType,
+                                                    simdSize);
+            BlockRange().InsertAfter(tmp2, tmp1);
+            LowerNode(tmp1);
+
+            if (simdBaseType == TYP_FLOAT)
+            {
+                // Float needs an additional pairwise add to finish summing the parts
+                // The first will have summed e0 with e1 and e2 with e3 and then repeats that for the upper half
+                // So, we will have a vector that looks like this:
+                //    < e0 + e1, e2 + e3, e0 + e1, e2 + e3>
+                // Doing a second horizontal add with itself will then give us
+                //    e0 + e1 + e2 + e3 in all elements of the vector
+
+                // We will be constructing the following parts:
+                //   ...
+                //          /--*  tmp1 simd16
+                //          *  STORE_LCL_VAR simd16
+                //   tmp1 =    LCL_VAR       simd16
+                //   tmp2 =    LCL_VAR       simd16
+                //          /--*  tmp1 simd16
+                //          +--*  tmp2 simd16
+                //   tmp2 = *  HWINTRINSIC   simd16 T AddPairwise
+                //   ...
+
+                // This is roughly the following managed code:
+                //   ...
+                //   var tmp2 = tmp1;
+                //   var tmp1 = AdvSimd.Arm64.AddPairwise(tmp1, tmp2);
+                //   ...
+
+                node->gtOp1 = tmp1;
+                LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node);
+                ReplaceWithLclVar(tmp1Use);
+                tmp1 = node->gtOp1;
+
+                tmp2 = comp->gtClone(tmp1);
+                BlockRange().InsertAfter(tmp1, tmp2);
+
+                tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_Arm64_AddPairwise,
+                                                        simdBaseJitType, simdSize);
+                BlockRange().InsertAfter(tmp2, tmp1);
+                LowerNode(tmp1);
+            }
+        }
+
+        tmp2 = tmp1;
+    }
+    else
+    {
+        assert(varTypeIsIntegral(simdBaseType));
+
+        // We will be constructing the following parts:
+        //   ...
+        //          /--*  tmp1 simd16
+        //   tmp2 = *  HWINTRINSIC   simd16 T AddAcross
+        //   ...
+
+        // This is roughly the following managed code:
+        //   ...
+        //   var tmp2 = AdvSimd.Arm64.AddAcross(tmp1);
+        //   ...
+
+        tmp2 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, simdSize);
+        BlockRange().InsertAfter(tmp1, tmp2);
+        LowerNode(tmp2);
+    }
+
+    // We will be constructing the following parts:
+    //   ...
+    //          /--*  tmp2 simd16
+    //   node = *  HWINTRINSIC   simd16 T ToScalar
+
+    // This is roughly the following managed code:
+    //   ...
+    //   return tmp2.ToScalar();
+
+    node->gtOp1 = tmp2;
+    node->gtOp2 = nullptr;
+
+    node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar;
+    LowerNode(node);
+
+    return;
+}
+
+#endif // FEATURE_HW_INTRINSICS
+
+//------------------------------------------------------------------------
+// Containment analysis
+//------------------------------------------------------------------------
+
+//------------------------------------------------------------------------
+// ContainCheckCallOperands: Determine whether operands of a call should be contained.
+//
+// Arguments:
+//    call       - The call node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::ContainCheckCallOperands(GenTreeCall* call)
+{
+    // There are no contained operands for LOONGARCH.
+}
+
+//------------------------------------------------------------------------
+// ContainCheckStoreIndir: determine whether the sources of a STOREIND node should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node)
+{
+#if 0
+assert(!"unimplemented on LOONGARCH yet");
+#else
+
+    GenTree* src = node->Data();
+    if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
+    {
+        // an integer zero for 'src' can be contained.
+        MakeSrcContained(node, src);
+    }
+
+    ContainCheckIndir(node);
+
+#endif
+}
+
+//------------------------------------------------------------------------
+// ContainCheckIndir: Determine whether operands of an indir should be contained.
+//
+// Arguments:
+//    indirNode - The indirection node of interest
+//
+// Notes:
+//    This is called for both store and load indirections.
+//
+// Return Value:
+//    None.
+//
+void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
+{
+    // If this is the rhs of a block copy it will be handled when we handle the store.
+    if (indirNode->TypeGet() == TYP_STRUCT)
+    {
+        return;
+    }
+
+#ifdef FEATURE_SIMD
+    assert(!"unimplemented on LOONGARCH yet");
+    // If indirTree is of TYP_SIMD12, don't mark addr as contained
+    // so that it always get computed to a register.  This would
+    // mean codegen side logic doesn't need to handle all possible
+    // addr expressions that could be contained.
+    //
+    // TODO-LOONGARCH64-CQ: handle other addr mode expressions that could be marked
+    // as contained.
+    if (indirNode->TypeGet() == TYP_SIMD12)
+    {
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    GenTree* addr          = indirNode->Addr();
+    if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr))
+    {
+        MakeSrcContained(indirNode, addr);
+    }
+    else if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
+    {
+        // These nodes go into an addr mode:
+        // - GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR is a stack addr mode.
+        MakeSrcContained(indirNode, addr);
+    }
+    else if (addr->OperIs(GT_CLS_VAR_ADDR))
+    {
+        // These nodes go into an addr mode:
+        // - GT_CLS_VAR_ADDR turns into a constant.
+        // make this contained, it turns into a constant that goes into an addr mode
+        MakeSrcContained(indirNode, addr);
+    }
+
+}
+
+//------------------------------------------------------------------------
+// ContainCheckBinary: Determine whether a binary op's operands should be contained.
+//
+// Arguments:
+//    node - the node we care about
+//
+void Lowering::ContainCheckBinary(GenTreeOp* node)
+{
+    // Check and make op2 contained (if it is a containable immediate)
+    CheckImmedAndMakeContained(node, node->gtOp2);
+}
+
+//------------------------------------------------------------------------
+// ContainCheckMul: Determine whether a mul op's operands should be contained.
+//
+// Arguments:
+//    node - the node we care about
+//
+void Lowering::ContainCheckMul(GenTreeOp* node)
+{
+    ContainCheckBinary(node);
+}
+
+//------------------------------------------------------------------------
+// ContainCheckDivOrMod: determine which operands of a div/mod should be contained.
+//
+// Arguments:
+//    node - the node we care about
+//
+void Lowering::ContainCheckDivOrMod(GenTreeOp* node)
+{
+    assert(node->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV));
+}
+
+//------------------------------------------------------------------------
+// ContainCheckShiftRotate: Determine whether a mul op's operands should be contained.
+//
+// Arguments:
+//    node - the node we care about
+//
+void Lowering::ContainCheckShiftRotate(GenTreeOp* node)
+{
+    GenTree* shiftBy = node->gtOp2;
+    assert(node->OperIsShiftOrRotate());
+
+    if (shiftBy->IsCnsIntOrI())
+    {
+        MakeSrcContained(node, shiftBy);
+    }
+}
+
+//------------------------------------------------------------------------
+// ContainCheckStoreLoc: determine whether the source of a STORE_LCL* should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const
+{
+    assert(storeLoc->OperIsLocalStore());
+    GenTree* op1 = storeLoc->gtGetOp1();
+
+    if (op1->OperIs(GT_BITCAST))
+    {
+        // If we know that the source of the bitcast will be in a register, then we can make
+        // the bitcast itself contained. This will allow us to store directly from the other
+        // type if this node doesn't get a register.
+        GenTree* bitCastSrc = op1->gtGetOp1();
+        if (!bitCastSrc->isContained() && !bitCastSrc->IsRegOptional())
+        {
+            op1->SetContained();
+            return;
+        }
+    }
+
+    const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc);
+
+
+
+
+#ifdef FEATURE_SIMD
+    if (varTypeIsSIMD(storeLoc))
+    {
+        // If this is a store to memory, we can initialize a zero vector in memory from REG_ZR.
+        if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister)
+        {
+            // For an InitBlk we want op1 to be contained
+            MakeSrcContained(storeLoc, op1);
+            if (op1->IsSIMDZero())
+            {
+                MakeSrcContained(op1, op1->gtGetOp1());
+            }
+        }
+        return;
+    }
+#endif // FEATURE_SIMD
+    if (IsContainableImmed(storeLoc, op1))
+    {
+        MakeSrcContained(storeLoc, op1);
+    }
+
+    // If the source is a containable immediate, make it contained, unless it is
+    // an int-size or larger store of zero to memory, because we can generate smaller code
+    // by zeroing a register and then storing it.
+    var_types type = varDsc->GetRegisterType(storeLoc);
+    if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(type)))
+    {
+        MakeSrcContained(storeLoc, op1);
+    }
+}
+
+//------------------------------------------------------------------------
+// ContainCheckCast: determine whether the source of a CAST node should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckCast(GenTreeCast* node)
+{
+}
+
+//------------------------------------------------------------------------
+// ContainCheckCompare: determine whether the sources of a compare node should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckCompare(GenTreeOp* cmp)
+{
+    CheckImmedAndMakeContained(cmp, cmp->gtOp2);
+}
+
+//------------------------------------------------------------------------
+// ContainCheckBoundsChk: determine whether any source of a bounds check node should be contained.
+//
+// Arguments:
+//    node - pointer to the node
+//
+void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node)
+{
+    assert(node->OperIsBoundsCheck());
+    if (!CheckImmedAndMakeContained(node, node->GetIndex()))
+    {
+        CheckImmedAndMakeContained(node, node->GetArrayLength());
+    }
+}
+
+#ifdef FEATURE_SIMD
+//----------------------------------------------------------------------------------------------
+// ContainCheckSIMD: Perform containment analysis for a SIMD intrinsic node.
+//
+//  Arguments:
+//     simdNode - The SIMD intrinsic node.
+//
+void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    switch (simdNode->gtSIMDIntrinsicID)
+    {
+        GenTree* op1;
+        GenTree* op2;
+
+        case SIMDIntrinsicInit:
+            op1 = simdNode->gtOp.gtOp1;
+            if (op1->IsIntegralConst(0))
+            {
+                MakeSrcContained(simdNode, op1);
+            }
+            break;
+
+        case SIMDIntrinsicInitArray:
+            // We have an array and an index, which may be contained.
+            CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2());
+            break;
+
+        case SIMDIntrinsicOpEquality:
+        case SIMDIntrinsicOpInEquality:
+            // TODO-LOONGARCH64-CQ Support containing 0
+            break;
+
+        case SIMDIntrinsicGetItem:
+        {
+            // This implements get_Item method. The sources are:
+            //  - the source SIMD struct
+            //  - index (which element to get)
+            // The result is baseType of SIMD struct.
+            op1 = simdNode->gtOp.gtOp1;
+            op2 = simdNode->gtOp.gtOp2;
+
+            // If the index is a constant, mark it as contained.
+            if (op2->IsCnsIntOrI())
+            {
+                MakeSrcContained(simdNode, op2);
+            }
+
+            if (IsContainableMemoryOp(op1))
+            {
+                MakeSrcContained(simdNode, op1);
+                if (op1->OperGet() == GT_IND)
+                {
+                    op1->AsIndir()->Addr()->ClearContained();
+                }
+            }
+            break;
+        }
+
+        default:
+            break;
+    }
+#endif
+}
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+//----------------------------------------------------------------------------------------------
+// ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    GenTreeArgList* argList = nullptr;
+    GenTree*        op1     = node->gtOp.gtOp1;
+    GenTree*        op2     = node->gtOp.gtOp2;
+
+    if (op1->OperIs(GT_LIST))
+    {
+        argList = op1->AsArgList();
+        op1     = argList->Current();
+        op2     = argList->Rest()->Current();
+    }
+
+    switch (HWIntrinsicInfo::lookup(node->gtHWIntrinsicId).form)
+    {
+        case HWIntrinsicInfo::SimdExtractOp:
+            if (op2->IsCnsIntOrI())
+            {
+                MakeSrcContained(node, op2);
+            }
+            break;
+
+        case HWIntrinsicInfo::SimdInsertOp:
+            if (op2->IsCnsIntOrI())
+            {
+                MakeSrcContained(node, op2);
+
+#if 0
+                // This is currently not supported downstream. The following (at least) need to be modifed:
+                //   GenTree::isContainableHWIntrinsic() needs to handle this.
+                //   CodeGen::genConsumRegs()
+                //
+                GenTree* op3 = argList->Rest()->Rest()->Current();
+
+                // In the HW intrinsics C# API there is no direct way to specify a vector element to element mov
+                //   VX[a] = VY[b]
+                // In C# this would naturally be expressed by
+                //   Insert(VX, a, Extract(VY, b))
+                // If both a & b are immediate constants contain the extract/getItem so that we can emit
+                //   the single instruction mov Vx[a], Vy[b]
+                if (op3->OperIs(GT_HWIntrinsic) && (op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_LOONGARCH64_SIMD_GetItem))
+                {
+                    ContainCheckHWIntrinsic(op3->AsHWIntrinsic());
+
+                    if (op3->gtOp.gtOp2->isContained())
+                    {
+                        MakeSrcContained(node, op3);
+                    }
+                }
+#endif
+            }
+            break;
+
+        default:
+            break;
+    }
+#endif
+}
+#endif // FEATURE_HW_INTRINSICS
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
index 9308836ff78fc..b43d29ed7cfc3 100644
--- a/src/coreclr/jit/lsra.cpp
+++ b/src/coreclr/jit/lsra.cpp
@@ -702,6 +702,8 @@ LinearScan::LinearScan(Compiler* theCompiler)
     enregisterLocalVars = compiler->compEnregLocals();
 #ifdef TARGET_ARM64
     availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd);
+#elif TARGET_LOONGARCH64
+    availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd);
 #else
     availableIntRegs   = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
 #endif
@@ -1570,7 +1572,11 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
             // vars will have `lvMustInit` set, because emitter has poor support for struct liveness,
             // but if the variable is tracked the prolog generator would expect it to be in liveIn set,
             // so an assert in `genFnProlog` will fire.
+#ifdef TARGET_LOONGARCH64
+            return !genIsValidFloatReg(varDsc->GetOtherArgReg()) && compiler->compEnregStructLocals() && !varDsc->HasGCPtr();
+#else
             return compiler->compEnregStructLocals() && !varDsc->HasGCPtr();
+#endif
 
         case TYP_UNDEF:
         case TYP_UNKNOWN:
@@ -2571,7 +2577,7 @@ void LinearScan::setFrameType()
 
     compiler->rpFrameType = frameType;
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // Determine whether we need to reserve a register for large lclVar offsets.
     if (compiler->compRsvdRegCheck(Compiler::REGALLOC_FRAME_LAYOUT))
     {
@@ -2581,7 +2587,7 @@ void LinearScan::setFrameType()
         JITDUMP("  Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD));
         removeMask |= RBM_OPT_RSVD;
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     if ((removeMask != RBM_NONE) && ((availableIntRegs & removeMask) != 0))
     {
@@ -2647,11 +2653,16 @@ RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition*
     assert(refPosition->getInterval() == currentInterval);
     RegisterType regType    = currentInterval->registerType;
     regMaskTP    candidates = refPosition->registerAssignment;
-
+#ifdef TARGET_LOONGARCH64
+    if ((candidates & allRegs(regType)) != RBM_NONE)
+        return regType;
+    else
+        return TYP_I_IMPL;
+#else
     assert((candidates & allRegs(regType)) != RBM_NONE);
     return regType;
+#endif
 }
-
 //------------------------------------------------------------------------
 // isMatchingConstant: Check to see whether a given register contains the constant referenced
 //                     by the given RefPosition
@@ -7673,7 +7684,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
         }
     }
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     // Next, if this blocks ends with a JCMP, we have to make sure:
     // 1. Not to copy into the register that JCMP uses
     //    e.g. JCMP w21, BRANCH
@@ -7786,7 +7797,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
                 sameToReg = REG_NA;
             }
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             if (jcmpLocalVarDsc && (jcmpLocalVarDsc->lvVarIndex == outResolutionSetVarIndex))
             {
                 sameToReg = REG_NA;
diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h
index 1b549424f0fc4..5e803336e2b4d 100644
--- a/src/coreclr/jit/lsra.h
+++ b/src/coreclr/jit/lsra.h
@@ -762,6 +762,9 @@ class LinearScan : public LinearScanInterface
 #elif defined(TARGET_X86)
     static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
     static const regMaskTP LsraLimitSmallFPSet  = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
+#elif defined(TARGET_LOONGARCH64)
+    static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
+    static const regMaskTP LsraLimitSmallFPSet  = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
 #else
 #error Unsupported or unset target architecture
 #endif // target
@@ -2215,7 +2218,12 @@ class RefPosition
     // The max bits needed is based on max value of MAX_RET_REG_COUNT value
     // across all targets and that happens 4 on on Arm.  Hence index value
     // would be 0..MAX_RET_REG_COUNT-1.
+#ifdef TARGET_LOONGARCH64
+    //TODO for LOONGARCH64: should confirm for ArgSplit?
+    unsigned char multiRegIdx : 3;
+#else // !TARGET_LOONGARCH64
     unsigned char multiRegIdx : 2;
+#endif // !TARGET_LOONGARCH64
 
     // Last Use - this may be true for multiple RefPositions in the same Interval
     unsigned char lastUse : 1;
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index fb5747079fa90..e139d877e1322 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -593,7 +593,9 @@ RefPosition* LinearScan::newRefPosition(Interval*    theInterval,
         regNumber    physicalReg = genRegNumFromMask(mask);
         RefPosition* pos         = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
         assert(theInterval != nullptr);
+#ifndef TARGET_LOONGARCH64
         assert((allRegs(theInterval->registerType) & mask) != 0);
+#endif
     }
 
     RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
@@ -3927,6 +3929,13 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree)
     addrCandidates = RBM_WRITE_BARRIER_DST;
     srcCandidates  = RBM_WRITE_BARRIER_SRC;
 
+#elif defined(TARGET_LOONGARCH64)
+    // the 'addr' goes into (REG_WRITE_BARRIER_DST)
+    // the 'src'  goes into (REG_WRITE_BARRIER_SRC)
+    //
+    addrCandidates = RBM_WRITE_BARRIER_DST;
+    srcCandidates  = RBM_WRITE_BARRIER_SRC;
+
 #elif defined(TARGET_X86) && NOGC_WRITE_BARRIERS
 
     bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src);
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
new file mode 100644
index 0000000000000..8ce30f7bb6855
--- /dev/null
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -0,0 +1,1731 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Copyright (c) Loongson Technology. All rights reserved.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                    Register Requirements for LOONGARCH64                  XX
+XX                                                                           XX
+XX  This encapsulates all the logic for setting register requirements for    XX
+XX  the LOONGARCH64 architecture.                                            XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef TARGET_LOONGARCH64
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+
+//------------------------------------------------------------------------
+// BuildNode: Build the RefPositions for for a node
+//
+// Arguments:
+//    treeNode - the node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+// Notes:
+// Preconditions:
+//    LSRA Has been initialized.
+//
+// Postconditions:
+//    RefPositions have been built for all the register defs and uses required
+//    for this node.
+//
+int LinearScan::BuildNode(GenTree* tree)
+{
+    assert(!tree->isContained());
+    int       srcCount;
+    int       dstCount      = 0;
+    regMaskTP dstCandidates = RBM_NONE;
+    regMaskTP killMask      = RBM_NONE;
+    bool      isLocalDefUse = false;
+
+    // Reset the build-related members of LinearScan.
+    clearBuildState();
+
+    // Set the default dstCount. This may be modified below.
+    if (tree->IsValue())
+    {
+        dstCount = 1;
+        if (tree->IsUnusedValue())
+        {
+            isLocalDefUse = true;
+        }
+    }
+    else
+    {
+        dstCount = 0;
+    }
+
+    switch (tree->OperGet())
+    {
+        default:
+            srcCount = BuildSimple(tree);
+            break;
+
+        case GT_LCL_VAR:
+            // We make a final determination about whether a GT_LCL_VAR is a candidate or contained
+            // after liveness. In either case we don't build any uses or defs. Otherwise, this is a
+            // load of a stack-based local into a register and we'll fall through to the general
+            // local case below.
+            if (checkContainedOrCandidateLclVar(tree->AsLclVar()))
+            {
+                return 0;
+            }
+            FALLTHROUGH;
+        case GT_LCL_FLD:
+        {
+            srcCount = 0;
+#ifdef FEATURE_SIMD
+            // Need an additional register to read upper 4 bytes of Vector3.
+            if (tree->TypeGet() == TYP_SIMD12)
+            {
+                // We need an internal register different from targetReg in which 'tree' produces its result
+                // because both targetReg and internal reg will be in use at the same time.
+                buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
+                setInternalRegsDelayFree = true;
+                buildInternalRegisterUses();
+            }
+#endif
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_STORE_LCL_VAR:
+            if (tree->IsMultiRegLclVar() && isCandidateMultiRegLclVar(tree->AsLclVar()))
+            {
+                dstCount = compiler->lvaGetDesc(tree->AsLclVar()->GetLclNum())->lvFieldCnt;
+            }
+            FALLTHROUGH;
+
+        case GT_STORE_LCL_FLD:
+            srcCount = BuildStoreLoc(tree->AsLclVarCommon());
+            break;
+
+        case GT_FIELD_LIST:
+            // These should always be contained. We don't correctly allocate or
+            // generate code for a non-contained GT_FIELD_LIST.
+            noway_assert(!"Non-contained GT_FIELD_LIST");
+            srcCount = 0;
+            break;
+
+        case GT_ARGPLACE:
+        case GT_NO_OP:
+        case GT_START_NONGC:
+            srcCount = 0;
+            assert(dstCount == 0);
+            break;
+
+        case GT_PROF_HOOK:
+            srcCount = 0;
+            assert(dstCount == 0);
+            killMask = getKillSetForProfilerHook();
+            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+            break;
+
+        case GT_START_PREEMPTGC:
+            // This kills GC refs in callee save regs
+            srcCount = 0;
+            assert(dstCount == 0);
+            BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE);
+            break;
+
+        case GT_CNS_DBL:
+        {
+            GenTreeDblCon* dblConst   = tree->AsDblCon();
+            double         constValue = dblConst->AsDblCon()->gtDconVal;
+
+            if ((constValue == (double)(int)constValue) && (-2048 <= constValue) && (constValue <= 2047))
+            {
+                // Directly encode constant to instructions.
+            }
+            else
+            {
+                // Reserve int to load constant from memory (IF_LARGELDC)
+                buildInternalIntRegisterDefForNode(tree);
+                buildInternalRegisterUses();
+            }
+        }
+            FALLTHROUGH;
+
+        case GT_CNS_INT:
+        {
+            srcCount = 0;
+            assert(dstCount == 1);
+            RefPosition* def               = BuildDef(tree);
+            def->getInterval()->isConstant = true;
+        }
+        break;
+
+        case GT_BOX:
+        case GT_COMMA:
+        case GT_QMARK:
+        case GT_COLON:
+            srcCount = 0;
+            assert(dstCount == 0);
+            unreached();
+            break;
+
+        case GT_RETURN:
+            srcCount = BuildReturn(tree);
+            killMask = getKillSetForReturn();
+            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+            break;
+
+        case GT_RETFILT:
+            assert(dstCount == 0);
+            if (tree->TypeGet() == TYP_VOID)
+            {
+                srcCount = 0;
+            }
+            else
+            {
+                assert(tree->TypeGet() == TYP_INT);
+                srcCount = 1;
+                BuildUse(tree->gtGetOp1(), RBM_INTRET);
+            }
+            break;
+
+        case GT_NOP:
+            // A GT_NOP is either a passthrough (if it is void, or if it has
+            // a child), but must be considered to produce a dummy value if it
+            // has a type but no child.
+            srcCount = 0;
+            if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr)
+            {
+                assert(dstCount == 1);
+                BuildDef(tree);
+            }
+            else
+            {
+                assert(dstCount == 0);
+            }
+            break;
+
+        case GT_KEEPALIVE:
+            assert(dstCount == 0);
+            srcCount = BuildOperandUses(tree->gtGetOp1());
+            break;
+
+        case GT_JTRUE:
+            srcCount = 0;
+            assert(dstCount == 0);
+            break;
+
+        case GT_JMP:
+            srcCount = 0;
+            assert(dstCount == 0);
+            break;
+
+        case GT_SWITCH:
+            // This should never occur since switch nodes must not be visible at this
+            // point in the JIT.
+            srcCount = 0;
+            noway_assert(!"Switch must be lowered at this point");
+            break;
+
+        case GT_JMPTABLE:
+            srcCount = 0;
+            assert(dstCount == 1);
+            BuildDef(tree);
+            break;
+
+        case GT_SWITCH_TABLE:
+            buildInternalIntRegisterDefForNode(tree);
+            srcCount = BuildBinaryUses(tree->AsOp());
+            assert(dstCount == 0);
+            break;
+
+        case GT_ASG:
+            noway_assert(!"We should never hit any assignment operator in lowering");
+            srcCount = 0;
+            break;
+
+        case GT_ADD:
+        case GT_SUB:
+            if (varTypeIsFloating(tree->TypeGet()))
+            {
+                // overflow operations aren't supported on float/double types.
+                assert(!tree->gtOverflow());
+
+                // No implicit conversions at this stage as the expectation is that
+                // everything is made explicit by adding casts.
+                assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
+            }
+
+            if (tree->gtOverflow())
+            {
+                // Need a register different from target reg to check for overflow.
+                buildInternalIntRegisterDefForNode(tree);
+                setInternalRegsDelayFree = true;
+            }
+            FALLTHROUGH;
+
+        case GT_AND:
+        case GT_OR:
+        case GT_XOR:
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROR:
+            srcCount = BuildBinaryUses(tree->AsOp());
+            buildInternalRegisterUses();
+            assert(dstCount == 1);
+            BuildDef(tree);
+            break;
+
+        case GT_RETURNTRAP:
+            // this just turns into a compare of its child with an int
+            // + a conditional call
+            BuildUse(tree->gtGetOp1());
+            srcCount = 1;
+            assert(dstCount == 0);
+            killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+            BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
+            break;
+
+        //case GT_MOD:
+        //case GT_UMOD:
+        //    NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in LOONGARCH64");
+        //    assert(!"Shouldn't see an integer typed GT_MOD node in LOONGARCH64");
+        //    srcCount = 0;
+        //    break;
+
+        case GT_MUL:
+        case GT_MOD:
+        case GT_UMOD:
+        case GT_DIV:
+        case GT_MULHI:
+        case GT_UDIV:
+        {
+            if (emitActualTypeSize(tree) == EA_4BYTE)
+            {
+                // We need two registers: tmpRegOp1 and tmpRegOp2
+                buildInternalIntRegisterDefForNode(tree);
+                buildInternalIntRegisterDefForNode(tree);
+            }
+
+            srcCount = BuildBinaryUses(tree->AsOp());
+            buildInternalRegisterUses();
+            assert(dstCount == 1);
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_INTRINSIC:
+        {
+            noway_assert((tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs) ||
+                         (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Ceiling) ||
+                         (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Floor) ||
+                         (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) ||
+                         (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Sqrt));
+
+            // Both operand and its result must be of the same floating point type.
+            GenTree* op1 = tree->gtGetOp1();
+            assert(varTypeIsFloating(op1));
+            assert(op1->TypeGet() == tree->TypeGet());
+
+            BuildUse(op1);
+            srcCount = 1;
+            assert(dstCount == 1);
+            BuildDef(tree);
+        }
+        break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            srcCount = BuildSIMD(tree->AsSIMD());
+            break;
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+        case GT_HWINTRINSIC:
+            srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic());
+            break;
+#endif // FEATURE_HW_INTRINSICS
+
+        case GT_CAST:
+            assert(dstCount == 1);
+            srcCount = BuildCast(tree->AsCast());
+            break;
+
+        case GT_NEG:
+        case GT_NOT:
+            BuildUse(tree->gtGetOp1());
+            srcCount = 1;
+            assert(dstCount == 1);
+            BuildDef(tree);
+            break;
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+        case GT_JCMP:
+            if (!varTypeIsFloating(tree->gtGetOp1()))
+            {
+                // We need two registers: tmpRegOp1 and tmpRegOp2
+                buildInternalIntRegisterDefForNode(tree);
+                buildInternalIntRegisterDefForNode(tree);
+                buildInternalRegisterUses();
+            }
+            srcCount = BuildCmp(tree);
+            break;
+
+        case GT_CKFINITE:
+            srcCount = 1;
+            assert(dstCount == 1);
+            buildInternalIntRegisterDefForNode(tree);
+            BuildUse(tree->gtGetOp1());
+            BuildDef(tree);
+            buildInternalRegisterUses();
+            break;
+
+        case GT_CMPXCHG:
+        {
+            GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
+            srcCount                    = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
+            assert(dstCount == 1);
+
+            if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
+            {
+                // For LOONGARCH exclusives requires a single internal register
+                buildInternalIntRegisterDefForNode(tree);
+            }
+
+            // For LOONGARCH exclusives the lifetime of the addr and data must be extended because
+            // it may be used multiple during retries
+
+            // For LOONGARCH atomic cas the lifetime of the addr and data must be extended to prevent
+            // them being reused as the target register which must be destroyed early
+
+            RefPosition* locationUse = BuildUse(tree->AsCmpXchg()->gtOpLocation);
+            setDelayFree(locationUse);
+            RefPosition* valueUse = BuildUse(tree->AsCmpXchg()->gtOpValue);
+            setDelayFree(valueUse);
+            if (!cmpXchgNode->gtOpComparand->isContained())
+            {
+                RefPosition* comparandUse = BuildUse(tree->AsCmpXchg()->gtOpComparand);
+
+                // For LOONGARCH exclusives the lifetime of the comparand must be extended because
+                // it may be used used multiple during retries
+                if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
+                {
+                    setDelayFree(comparandUse);
+                }
+            }
+
+            // Internals may not collide with target
+            setInternalRegsDelayFree = true;
+            buildInternalRegisterUses();
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_LOCKADD:
+        case GT_XORR:
+        case GT_XAND:
+        case GT_XADD:
+        case GT_XCHG:
+        {
+            assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
+            srcCount = tree->gtGetOp2()->isContained() ? 1 : 2;
+
+            if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
+            {
+                // GT_XCHG requires a single internal register; the others require two.
+                buildInternalIntRegisterDefForNode(tree);
+                if (tree->OperGet() != GT_XCHG)
+                {
+                    buildInternalIntRegisterDefForNode(tree);
+                }
+            }
+            else if (tree->OperIs(GT_XAND))
+            {
+                // for ldclral we need an internal register.
+                buildInternalIntRegisterDefForNode(tree);
+            }
+
+            assert(!tree->gtGetOp1()->isContained());
+            RefPosition* op1Use = BuildUse(tree->gtGetOp1());
+            RefPosition* op2Use = nullptr;
+            if (!tree->gtGetOp2()->isContained())
+            {
+                op2Use = BuildUse(tree->gtGetOp2());
+            }
+
+            // For LOONGARCH exclusives the lifetime of the addr and data must be extended because
+            // it may be used used multiple during retries
+            if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
+            {
+                // Internals may not collide with target
+                if (dstCount == 1)
+                {
+                    setDelayFree(op1Use);
+                    if (op2Use != nullptr)
+                    {
+                        setDelayFree(op2Use);
+                    }
+                    setInternalRegsDelayFree = true;
+                }
+                buildInternalRegisterUses();
+            }
+            if (dstCount == 1)
+            {
+                BuildDef(tree);
+            }
+        }
+        break;
+
+#if FEATURE_ARG_SPLIT
+        case GT_PUTARG_SPLIT:
+            srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
+            dstCount = tree->AsPutArgSplit()->gtNumRegs;
+            break;
+#endif // FEATURE _SPLIT_ARG
+
+        case GT_PUTARG_STK:
+            srcCount = BuildPutArgStk(tree->AsPutArgStk());
+            break;
+
+        case GT_PUTARG_REG:
+            srcCount = BuildPutArgReg(tree->AsUnOp());
+            break;
+
+        case GT_CALL:
+            srcCount = BuildCall(tree->AsCall());
+            if (tree->AsCall()->HasMultiRegRetVal())
+            {
+                dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
+            }
+            break;
+
+        case GT_ADDR:
+        {
+            // For a GT_ADDR, the child node should not be evaluated into a register
+            GenTree* child = tree->gtGetOp1();
+            assert(!isCandidateLocalRef(child));
+            assert(child->isContained());
+            assert(dstCount == 1);
+            srcCount = 0;
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_BLK:
+        case GT_DYN_BLK:
+            // These should all be eliminated prior to Lowering.
+            assert(!"Non-store block node in Lowering");
+            srcCount = 0;
+            break;
+
+        case GT_STORE_BLK:
+        case GT_STORE_OBJ:
+        case GT_STORE_DYN_BLK:
+            srcCount = BuildBlockStore(tree->AsBlk());
+            break;
+
+        case GT_INIT_VAL:
+            // Always a passthrough of its child's value.
+            assert(!"INIT_VAL should always be contained");
+            srcCount = 0;
+            break;
+
+        case GT_LCLHEAP:
+        {
+            assert(dstCount == 1);
+
+            // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
+            // Here '-' means don't care.
+            //
+            //  Size?                   Init Memory?    # temp regs
+            //   0                          -               0
+            //   const and <=6 ptr words    -               0
+            //   const and <PageSize        No              0
+            //   >6 ptr words               Yes             0
+            //   Non-const                  Yes             0
+            //   Non-const                  No              2
+            //
+
+            GenTree* size = tree->gtGetOp1();
+            if (size->IsCnsIntOrI())
+            {
+                assert(size->isContained());
+                srcCount = 0;
+
+                size_t sizeVal = size->AsIntCon()->gtIconVal;
+
+                if (sizeVal != 0)
+                {
+                    // Compute the amount of memory to properly STACK_ALIGN.
+                    // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
+                    // This should also help in debugging as we can examine the original size specified with
+                    // localloc.
+                    sizeVal         = AlignUp(sizeVal, STACK_ALIGN);
+                    size_t stpCount = sizeVal / (REGSIZE_BYTES * 2);
+
+                    // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc)
+                    //
+                    if (stpCount <= 4)
+                    {
+                        // Need no internal registers
+                    }
+                    else if (!compiler->info.compInitMem)
+                    {
+                        // No need to initialize allocated stack space.
+                        if (sizeVal < compiler->eeGetPageSize())
+                        {
+                            // Need no internal registers
+                        }
+                        else
+                        {
+                            // We need two registers: regCnt and RegTmp
+                            buildInternalIntRegisterDefForNode(tree);
+                            buildInternalIntRegisterDefForNode(tree);
+                        }
+                    }
+                }
+            }
+            else
+            {
+                srcCount = 1;
+                if (!compiler->info.compInitMem)
+                {
+                    buildInternalIntRegisterDefForNode(tree);
+                    buildInternalIntRegisterDefForNode(tree);
+                }
+            }
+
+            if (!size->isContained())
+            {
+                BuildUse(size);
+            }
+            buildInternalRegisterUses();
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+#ifdef FEATURE_HW_INTRINSICS
+        case GT_HW_INTRINSIC_CHK:
+#endif // FEATURE_HW_INTRINSICS
+        {
+            GenTreeBoundsChk* node = tree->AsBoundsChk();
+            // Consumes arrLen & index - has no result
+            assert(dstCount == 0);
+            srcCount = BuildOperandUses(node->GetIndex());
+            srcCount += BuildOperandUses(node->GetArrayLength());
+        }
+        break;
+
+        case GT_ARR_ELEM:
+            // These must have been lowered to GT_ARR_INDEX
+            noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+            srcCount = 0;
+            assert(dstCount == 0);
+            break;
+
+        case GT_ARR_INDEX:
+        {
+            srcCount = 2;
+            assert(dstCount == 1);
+            buildInternalIntRegisterDefForNode(tree);
+            setInternalRegsDelayFree = true;
+
+            // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+            // times while the result is being computed.
+            RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj());
+            setDelayFree(arrObjUse);
+            BuildUse(tree->AsArrIndex()->IndexExpr());
+            buildInternalRegisterUses();
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_ARR_OFFSET:
+            // This consumes the offset, if any, the arrObj and the effective index,
+            // and produces the flattened offset for this dimension.
+            srcCount = 2;
+            if (!tree->AsArrOffs()->gtOffset->isContained())
+            {
+                BuildUse(tree->AsArrOffs()->gtOffset);
+                srcCount++;
+            }
+            BuildUse(tree->AsArrOffs()->gtIndex);
+            BuildUse(tree->AsArrOffs()->gtArrObj);
+            assert(dstCount == 1);
+            buildInternalIntRegisterDefForNode(tree);
+            buildInternalRegisterUses();
+            BuildDef(tree);
+            break;
+
+        case GT_LEA:
+        {
+            GenTreeAddrMode* lea = tree->AsAddrMode();
+
+            GenTree* base  = lea->Base();
+            GenTree* index = lea->Index();
+            int      cns   = lea->Offset();
+
+            // This LEA is instantiating an address, so we set up the srcCount here.
+            srcCount = 0;
+            if (base != nullptr)
+            {
+                srcCount++;
+                BuildUse(base);
+            }
+            if (index != nullptr)
+            {
+                srcCount++;
+                BuildUse(index);
+            }
+            assert(dstCount == 1);
+
+            // On LOONGARCH64 we may need a single internal register
+            // (when both conditions are true then we still only need a single internal register)
+            if ((index != nullptr) && (cns != 0))
+            {
+                // LOONGARCH64 does not support both Index and offset so we need an internal register
+                buildInternalIntRegisterDefForNode(tree);
+            }
+            else if (!((-2048 <= cns) && (cns <= 2047)))
+            {
+                // This offset can't be contained in the add instruction, so we need an internal register
+                buildInternalIntRegisterDefForNode(tree);
+            }
+            buildInternalRegisterUses();
+            BuildDef(tree);
+        }
+        break;
+
+        case GT_STOREIND:
+        {
+            assert(dstCount == 0);
+
+            if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree))
+            {
+                srcCount = BuildGCWriteBarrier(tree);
+                break;
+            }
+
+            srcCount = BuildIndir(tree->AsIndir());
+            if (!tree->gtGetOp2()->isContained())
+            {
+                BuildUse(tree->gtGetOp2());
+                srcCount++;
+            }
+        }
+        break;
+
+        case GT_NULLCHECK:
+        case GT_IND:
+            assert(dstCount == (tree->OperIs(GT_NULLCHECK) ? 0 : 1));
+            srcCount = BuildIndir(tree->AsIndir());
+            break;
+
+        case GT_CATCH_ARG:
+            srcCount = 0;
+            assert(dstCount == 1);
+            BuildDef(tree, RBM_EXCEPTION_OBJECT);
+            break;
+
+        case GT_CLS_VAR:
+            srcCount = 0;
+            // GT_CLS_VAR, by the time we reach the backend, must always
+            // be a pure use.
+            // It will produce a result of the type of the
+            // node, and use an internal register for the address.
+
+            assert(dstCount == 1);
+            assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
+            buildInternalIntRegisterDefForNode(tree);
+            buildInternalRegisterUses();
+            BuildDef(tree);
+            break;
+
+        case GT_INDEX_ADDR:
+            assert(dstCount == 1);
+            srcCount = BuildBinaryUses(tree->AsOp());
+            buildInternalIntRegisterDefForNode(tree);
+            buildInternalRegisterUses();
+            BuildDef(tree);
+            break;
+
+    } // end switch (tree->OperGet())
+
+    if (tree->IsUnusedValue() && (dstCount != 0))
+    {
+        isLocalDefUse = true;
+    }
+    // We need to be sure that we've set srcCount and dstCount appropriately
+    assert((dstCount < 2) || tree->IsMultiRegNode());
+    assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
+    assert(!tree->IsUnusedValue() || (dstCount != 0));
+    assert(dstCount == tree->GetRegisterDstCount(compiler));
+    return srcCount;
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// BuildSIMD: Set the NodeInfo for a GT_SIMD tree.
+//
+// Arguments:
+//    tree       - The GT_SIMD node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    int srcCount = 0;
+    // Only SIMDIntrinsicInit can be contained
+    if (simdTree->isContained())
+    {
+        assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
+    }
+    int dstCount = simdTree->IsValue() ? 1 : 0;
+    assert(dstCount == 1);
+
+    bool buildUses = true;
+
+    GenTree* op1 = simdTree->gtGetOp1();
+    GenTree* op2 = simdTree->gtGetOp2();
+
+    switch (simdTree->gtSIMDIntrinsicID)
+    {
+        case SIMDIntrinsicInit:
+        case SIMDIntrinsicCast:
+        case SIMDIntrinsicSqrt:
+        case SIMDIntrinsicAbs:
+        case SIMDIntrinsicConvertToSingle:
+        case SIMDIntrinsicConvertToInt32:
+        case SIMDIntrinsicConvertToDouble:
+        case SIMDIntrinsicConvertToInt64:
+        case SIMDIntrinsicWidenLo:
+        case SIMDIntrinsicWidenHi:
+            // No special handling required.
+            break;
+
+        case SIMDIntrinsicGetItem:
+        {
+            op1 = simdTree->gtGetOp1();
+            op2 = simdTree->gtGetOp2();
+
+            // We have an object and an index, either of which may be contained.
+            bool setOp2DelayFree = false;
+            if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
+            {
+                // If the index is not a constant and the object is not contained or is a local
+                // we will need a general purpose register to calculate the address
+                // internal register must not clobber input index
+                // TODO-Cleanup: An internal register will never clobber a source; this code actually
+                // ensures that the index (op2) doesn't interfere with the target.
+                buildInternalIntRegisterDefForNode(simdTree);
+                setOp2DelayFree = true;
+            }
+            srcCount += BuildOperandUses(op1);
+            if (!op2->isContained())
+            {
+                RefPosition* op2Use = BuildUse(op2);
+                if (setOp2DelayFree)
+                {
+                    setDelayFree(op2Use);
+                }
+                srcCount++;
+            }
+
+            if (!op2->IsCnsIntOrI() && (!op1->isContained()))
+            {
+                // If vector is not already in memory (contained) and the index is not a constant,
+                // we will use the SIMD temp location to store the vector.
+                compiler->getSIMDInitTempVarNum();
+            }
+            buildUses = false;
+        }
+        break;
+
+        case SIMDIntrinsicAdd:
+        case SIMDIntrinsicSub:
+        case SIMDIntrinsicMul:
+        case SIMDIntrinsicDiv:
+        case SIMDIntrinsicBitwiseAnd:
+        case SIMDIntrinsicBitwiseAndNot:
+        case SIMDIntrinsicBitwiseOr:
+        case SIMDIntrinsicBitwiseXor:
+        case SIMDIntrinsicMin:
+        case SIMDIntrinsicMax:
+        case SIMDIntrinsicEqual:
+        case SIMDIntrinsicLessThan:
+        case SIMDIntrinsicGreaterThan:
+        case SIMDIntrinsicLessThanOrEqual:
+        case SIMDIntrinsicGreaterThanOrEqual:
+            // No special handling required.
+            break;
+
+        case SIMDIntrinsicSetX:
+        case SIMDIntrinsicSetY:
+        case SIMDIntrinsicSetZ:
+        case SIMDIntrinsicSetW:
+        case SIMDIntrinsicNarrow:
+        {
+            // Op1 will write to dst before Op2 is free
+            BuildUse(op1);
+            RefPosition* op2Use = BuildUse(op2);
+            setDelayFree(op2Use);
+            srcCount  = 2;
+            buildUses = false;
+            break;
+        }
+
+        case SIMDIntrinsicInitN:
+        {
+            var_types baseType = simdTree->gtSIMDBaseType;
+            srcCount           = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
+            if (varTypeIsFloating(simdTree->gtSIMDBaseType))
+            {
+                // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
+                buildInternalFloatRegisterDefForNode(simdTree);
+            }
+
+            for (GenTree* operand : simdTree->Operands())
+            {
+                assert(operand->TypeIs(baseType));
+                assert(!operand->isContained());
+
+                BuildUse(operand);
+            }
+
+            buildUses = false;
+            break;
+        }
+
+        case SIMDIntrinsicInitArray:
+            // We have an array and an index, which may be contained.
+            break;
+
+        case SIMDIntrinsicOpEquality:
+        case SIMDIntrinsicOpInEquality:
+            buildInternalFloatRegisterDefForNode(simdTree);
+            break;
+
+        case SIMDIntrinsicDotProduct:
+            buildInternalFloatRegisterDefForNode(simdTree);
+            break;
+
+        case SIMDIntrinsicSelect:
+            // TODO-LOONGARCH64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB
+            // bsl target register must be VC.  Reserve a temp in case we need to shuffle things.
+            // This will require a different approach, as GenTreeSIMD has only two operands.
+            assert(!"SIMDIntrinsicSelect not yet supported");
+            buildInternalFloatRegisterDefForNode(simdTree);
+            break;
+
+        case SIMDIntrinsicInitArrayX:
+        case SIMDIntrinsicInitFixed:
+        case SIMDIntrinsicCopyToArray:
+        case SIMDIntrinsicCopyToArrayX:
+        case SIMDIntrinsicNone:
+        case SIMDIntrinsicGetCount:
+        case SIMDIntrinsicGetOne:
+        case SIMDIntrinsicGetZero:
+        case SIMDIntrinsicGetAllOnes:
+        case SIMDIntrinsicGetX:
+        case SIMDIntrinsicGetY:
+        case SIMDIntrinsicGetZ:
+        case SIMDIntrinsicGetW:
+        case SIMDIntrinsicInstEquals:
+        case SIMDIntrinsicHWAccel:
+        case SIMDIntrinsicWiden:
+        case SIMDIntrinsicInvalid:
+            assert(!"These intrinsics should not be seen during register allocation");
+            __fallthrough;
+
+        default:
+            noway_assert(!"Unimplemented SIMD node type.");
+            unreached();
+    }
+    if (buildUses)
+    {
+        assert(!op1->OperIs(GT_LIST));
+        assert(srcCount == 0);
+        srcCount = BuildOperandUses(op1);
+        if ((op2 != nullptr) && !op2->isContained())
+        {
+            srcCount += BuildOperandUses(op2);
+        }
+    }
+    assert(internalCount <= MaxInternalCount);
+    buildInternalRegisterUses();
+    if (dstCount == 1)
+    {
+        BuildDef(simdTree);
+    }
+    else
+    {
+        assert(dstCount == 0);
+    }
+    return srcCount;
+#endif
+}
+#endif // FEATURE_SIMD
+
+#ifdef FEATURE_HW_INTRINSICS
+#include "hwintrinsic.h"
+//------------------------------------------------------------------------
+// BuildHWIntrinsic: Set the NodeInfo for a GT_HWINTRINSIC tree.
+//
+// Arguments:
+//    tree       - The GT_HWINTRINSIC node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
+{
+assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
+    int            numArgs     = HWIntrinsicInfo::lookupNumArgs(intrinsicTree);
+
+    GenTree* op1      = intrinsicTree->gtGetOp1();
+    GenTree* op2      = intrinsicTree->gtGetOp2();
+    GenTree* op3      = nullptr;
+    int      srcCount = 0;
+
+    if ((op1 != nullptr) && op1->OperIsList())
+    {
+        // op2 must be null, and there must be at least two more arguments.
+        assert(op2 == nullptr);
+        noway_assert(op1->AsArgList()->Rest() != nullptr);
+        noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr);
+        assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
+        op2 = op1->AsArgList()->Rest()->Current();
+        op3 = op1->AsArgList()->Rest()->Rest()->Current();
+        op1 = op1->AsArgList()->Current();
+    }
+
+    bool op2IsDelayFree = false;
+    bool op3IsDelayFree = false;
+
+    // Create internal temps, and handle any other special requirements.
+    switch (HWIntrinsicInfo::lookup(intrinsicID).form)
+    {
+        case HWIntrinsicInfo::Sha1HashOp:
+            assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
+            if (!op2->isContained())
+            {
+                assert(!op3->isContained());
+                op2IsDelayFree           = true;
+                op3IsDelayFree           = true;
+                setInternalRegsDelayFree = true;
+            }
+            buildInternalFloatRegisterDefForNode(intrinsicTree);
+            break;
+        case HWIntrinsicInfo::SimdTernaryRMWOp:
+            assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
+            if (!op2->isContained())
+            {
+                assert(!op3->isContained());
+                op2IsDelayFree = true;
+                op3IsDelayFree = true;
+            }
+            break;
+        case HWIntrinsicInfo::Sha1RotateOp:
+            buildInternalFloatRegisterDefForNode(intrinsicTree);
+            break;
+
+        case HWIntrinsicInfo::SimdExtractOp:
+        case HWIntrinsicInfo::SimdInsertOp:
+            if (!op2->isContained())
+            {
+                // We need a temp to create a switch table
+                buildInternalIntRegisterDefForNode(intrinsicTree);
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    // Next, build uses
+    if (numArgs > 3)
+    {
+        srcCount = 0;
+        assert(!op2IsDelayFree && !op3IsDelayFree);
+        assert(op1->OperIs(GT_LIST));
+        {
+            for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
+            {
+                srcCount += BuildOperandUses(list->Current());
+            }
+        }
+        assert(srcCount == numArgs);
+    }
+    else
+    {
+        if (op1 != nullptr)
+        {
+            srcCount += BuildOperandUses(op1);
+            if (op2 != nullptr)
+            {
+                srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2);
+                if (op3 != nullptr)
+                {
+                    srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3);
+                }
+            }
+        }
+    }
+    buildInternalRegisterUses();
+
+    // Now defs
+    if (intrinsicTree->IsValue())
+    {
+        BuildDef(intrinsicTree);
+    }
+
+    return srcCount;
+#endif
+}
+#endif
+
+//------------------------------------------------------------------------
+// BuildIndir: Specify register requirements for address expression
+//                       of an indirection operation.
+//
+// Arguments:
+//    indirTree - GT_IND, GT_STOREIND or block gentree node
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildIndir(GenTreeIndir* indirTree)
+{
+    // struct typed indirs are expected only on rhs of a block copy,
+    // but in this case they must be contained.
+    assert(indirTree->TypeGet() != TYP_STRUCT);
+
+    GenTree* addr  = indirTree->Addr();
+    GenTree* index = nullptr;
+    int      cns   = 0;
+
+    if (addr->isContained())
+    {
+        if (addr->OperGet() == GT_LEA)
+        {
+            GenTreeAddrMode* lea = addr->AsAddrMode();
+            index                = lea->Index();
+            cns                  = lea->Offset();
+
+            // On LOONGARCH we may need a single internal register
+            // (when both conditions are true then we still only need a single internal register)
+            if ((index != nullptr) && (cns != 0))
+            {
+                // LOONGARCH does not support both Index and offset so we need an internal register
+                buildInternalIntRegisterDefForNode(indirTree);
+            }
+            else if (!((-2048 <= cns) && (cns <= 2047)))
+            {
+                // This offset can't be contained in the ldr/str instruction, so we need an internal register
+                buildInternalIntRegisterDefForNode(indirTree);
+            }
+        }
+    }
+
+#ifdef FEATURE_SIMD
+    if (indirTree->TypeGet() == TYP_SIMD12)
+    {
+        // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir().
+        assert(!addr->isContained());
+
+        // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
+        // To assemble the vector properly we would need an additional int register
+        buildInternalIntRegisterDefForNode(indirTree);
+    }
+#endif // FEATURE_SIMD
+
+    int srcCount = BuildIndirUses(indirTree);
+    buildInternalRegisterUses();
+
+    if (!indirTree->OperIs(GT_STOREIND, GT_NULLCHECK))
+    {
+        BuildDef(indirTree);
+    }
+    return srcCount;
+}
+
+//------------------------------------------------------------------------
+// BuildCall: Set the NodeInfo for a call.
+//
+// Arguments:
+//    call - The call node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildCall(GenTreeCall* call)
+{
+    bool            hasMultiRegRetVal = false;
+    const ReturnTypeDesc* retTypeDesc = nullptr;
+    regMaskTP       dstCandidates     = RBM_NONE;
+
+    int srcCount = 0;
+    int dstCount = 0;
+    if (call->TypeGet() != TYP_VOID)
+    {
+        hasMultiRegRetVal = call->HasMultiRegRetVal();
+        if (hasMultiRegRetVal)
+        {
+            // dst count = number of registers in which the value is returned by call
+            retTypeDesc = call->GetReturnTypeDesc();
+            dstCount    = retTypeDesc->GetReturnRegCount();
+        }
+        else
+        {
+            dstCount = 1;
+        }
+    }
+
+    GenTree*  ctrlExpr           = call->gtControlExpr;
+    regMaskTP ctrlExprCandidates = RBM_NONE;
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        // either gtControlExpr != null or gtCallAddr != null.
+        // Both cannot be non-null at the same time.
+        assert(ctrlExpr == nullptr);
+        assert(call->gtCallAddr != nullptr);
+        ctrlExpr = call->gtCallAddr;
+    }
+
+    // set reg requirements on call target represented as control sequence.
+    if (ctrlExpr != nullptr)
+    {
+        // we should never see a gtControlExpr whose type is void.
+        assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+        // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+        // computed into a register.
+        if (call->IsFastTailCall())
+        {
+            // Fast tail call - make sure that call target is always computed in T9(LOONGARCH64)
+            // so that epilog sequence can generate "jr t9" to achieve fast tail call.
+            ctrlExprCandidates = RBM_FASTTAILCALL_TARGET;
+        }
+    }
+    else if (call->IsR2ROrVirtualStubRelativeIndir())
+    {
+        buildInternalIntRegisterDefForNode(call);
+    }
+
+    RegisterType registerType = call->TypeGet();
+
+// Set destination candidates for return value of the call.
+
+    if (hasMultiRegRetVal)
+    {
+        assert(retTypeDesc != nullptr);
+        dstCandidates = retTypeDesc->GetABIReturnRegs();
+    }
+    else if (varTypeUsesFloatArgReg(registerType))
+    {
+        dstCandidates = RBM_FLOATRET;
+    }
+    else if (registerType == TYP_LONG)
+    {
+        dstCandidates = RBM_LNGRET;
+    }
+    else
+    {
+        dstCandidates = RBM_INTRET;
+    }
+
+    // First, count reg args
+    // Each register argument corresponds to one source.
+    bool callHasFloatRegArgs = false;
+
+    for (GenTreeCall::Use& arg : call->LateArgs())
+    {
+        GenTree* argNode = arg.GetNode();
+
+#ifdef DEBUG
+        // During Build, we only use the ArgTabEntry for validation,
+        // as getting it is rather expensive.
+        fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+        regNumber      argReg         = curArgTabEntry->GetRegNum();
+        assert(curArgTabEntry != nullptr);
+#endif
+
+        if (argNode->gtOper == GT_PUTARG_STK)
+        {
+            // late arg that is not passed in a register
+            assert(curArgTabEntry->GetRegNum() == REG_STK);
+            // These should never be contained.
+            assert(!argNode->isContained());
+            continue;
+        }
+
+        // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
+        if (argNode->OperGet() == GT_FIELD_LIST)
+        {
+            assert(argNode->isContained());
+
+            // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+            for (GenTreeFieldList::Use& use : argNode->AsFieldList()->Uses())
+            {
+#ifdef DEBUG
+                assert(use.GetNode()->OperIs(GT_PUTARG_REG));
+#endif
+                BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum()));
+                srcCount++;
+            }
+        }
+#if FEATURE_ARG_SPLIT
+        else if (argNode->OperGet() == GT_PUTARG_SPLIT)
+        {
+            unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs;
+            assert(regCount == curArgTabEntry->numRegs);
+            for (unsigned int i = 0; i < regCount; i++)
+            {
+                BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i);
+            }
+            srcCount += regCount;
+        }
+#endif // FEATURE_ARG_SPLIT
+        else
+        {
+            assert(argNode->OperIs(GT_PUTARG_REG));
+            assert(argNode->GetRegNum() == argReg);
+            HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+            {
+                BuildUse(argNode, genRegMask(argNode->GetRegNum()));
+                srcCount++;
+            }
+        }
+    }
+
+    // Now, count stack args
+    // Note that these need to be computed into a register, but then
+    // they're just stored to the stack - so the reg doesn't
+    // need to remain live until the call.  In fact, it must not
+    // because the code generator doesn't actually consider it live,
+    // so it can't be spilled.
+
+    for (GenTreeCall::Use& use : call->Args())
+    {
+        GenTree* arg = use.GetNode();
+
+        // Skip arguments that have been moved to the Late Arg list
+        if ((arg->gtFlags & GTF_LATE_ARG) == 0)
+        {
+#ifdef DEBUG
+            fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+            assert(curArgTabEntry != nullptr);
+#endif
+#if FEATURE_ARG_SPLIT
+            // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they
+            // define registers used by the call.
+            assert(arg->OperGet() != GT_PUTARG_SPLIT);
+#endif // FEATURE_ARG_SPLIT
+            if (arg->gtOper == GT_PUTARG_STK)
+            {
+                assert(curArgTabEntry->GetRegNum() == REG_STK);
+            }
+            else
+            {
+                assert(!arg->IsValue() || arg->IsUnusedValue());
+            }
+        }
+    }
+
+    // If it is a fast tail call, it is already preferenced to use IP0.
+    // Therefore, no need set src candidates on call tgt again.
+    if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+    {
+        // Don't assign the call target to any of the argument registers because
+        // we will use them to also pass floating point arguments as required
+        // by LOONGARCH64 ABI.
+        ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS);
+    }
+
+    if (ctrlExpr != nullptr)
+    {
+        BuildUse(ctrlExpr, ctrlExprCandidates);
+        srcCount++;
+    }
+
+    buildInternalRegisterUses();
+
+    // Now generate defs and kills.
+    regMaskTP killMask = getKillSetForCall(call);
+    BuildDefsWithKills(call, dstCount, dstCandidates, killMask);
+    return srcCount;
+}
+
+//------------------------------------------------------------------------
+// BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+//
+// Arguments:
+//    argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+// Notes:
+//    Set the child node(s) to be contained when we have a multireg arg
+//
+int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
+{
+    assert(argNode->gtOper == GT_PUTARG_STK);
+
+    GenTree* putArgChild = argNode->gtGetOp1();
+
+    int srcCount = 0;
+
+    // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
+    if (putArgChild->TypeIs(TYP_STRUCT) || putArgChild->OperIs(GT_FIELD_LIST))
+    {
+        // We will use store instructions that each write a register sized value
+
+        if (putArgChild->OperIs(GT_FIELD_LIST))
+        {
+            assert(putArgChild->isContained());
+            // We consume all of the items in the GT_FIELD_LIST
+            for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses())
+            {
+                BuildUse(use.GetNode());
+                srcCount++;
+            }
+        }
+        else
+        {
+            // We can use a ldp/stp sequence so we need two internal registers for LOONGARCH64; one for ARM.
+            buildInternalIntRegisterDefForNode(argNode);
+
+            if (putArgChild->OperGet() == GT_OBJ)
+            {
+                assert(putArgChild->isContained());
+                GenTree* objChild = putArgChild->gtGetOp1();
+                if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+                {
+                    // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+                    // as one contained operation, and there are no source registers.
+                    //
+                    assert(objChild->isContained());
+                }
+                else
+                {
+                    // We will generate all of the code for the GT_PUTARG_STK and its child node
+                    // as one contained operation
+                    //
+                    srcCount = BuildOperandUses(objChild);
+                }
+            }
+            else
+            {
+                // No source registers.
+                putArgChild->OperIs(GT_LCL_VAR);
+            }
+        }
+    }
+    else
+    {
+        assert(!putArgChild->isContained());
+        srcCount = BuildOperandUses(putArgChild);
+    }
+    buildInternalRegisterUses();
+    return srcCount;
+}
+
+#if FEATURE_ARG_SPLIT
+//------------------------------------------------------------------------
+// BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node
+//
+// Arguments:
+//    argNode - a GT_PUTARG_SPLIT node
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+// Notes:
+//    Set the child node(s) to be contained
+//
+int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
+{
+    int srcCount = 0;
+    assert(argNode->gtOper == GT_PUTARG_SPLIT);
+
+    GenTree* putArgChild = argNode->gtGetOp1();
+
+    // Registers for split argument corresponds to source
+    int dstCount = argNode->gtNumRegs;
+
+    regNumber argReg  = argNode->GetRegNum();
+    regMaskTP argMask = RBM_NONE;
+    regMaskTP argMaskArr[MAX_REG_ARG] = {RBM_NONE};
+
+    for (unsigned i = 0; i < dstCount; i++)
+    {
+        argMaskArr[i] = genRegMask(argNode->GetRegNumByIdx(i));
+        argMask |= argMaskArr[i];
+    }
+
+    if (putArgChild->OperGet() == GT_FIELD_LIST)
+    {
+        // Generated code:
+        // 1. Consume all of the items in the GT_FIELD_LIST (source)
+        // 2. Store to target slot and move to target registers (destination) from source
+        //
+        unsigned sourceRegCount = 0;
+
+        // To avoid redundant moves, have the argument operand computed in the
+        // register in which the argument is passed to the call.
+
+        for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses())
+        {
+            GenTree* node = use.GetNode();
+            assert(!node->isContained());
+            // The only multi-reg nodes we should see are OperIsMultiRegOp()
+            assert(!node->IsMultiRegNode());
+
+            // Consume all the registers, setting the appropriate register mask for the ones that
+            // go into registers.
+            // (sourceRegCount < argNode->gtNumRegs)
+            BuildUse(node, argMaskArr[sourceRegCount], 0);
+            sourceRegCount++;
+        }
+        srcCount += sourceRegCount;
+        assert(putArgChild->isContained());
+    }
+    else
+    {
+        assert(putArgChild->TypeGet() == TYP_STRUCT);
+        assert(putArgChild->OperGet() == GT_OBJ);
+
+        // We can use a ldr/str sequence so we need an internal register
+        buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask);
+
+        GenTree* objChild = putArgChild->gtGetOp1();
+        if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+        {
+            // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR
+            // as one contained operation
+            //
+            assert(objChild->isContained());
+        }
+        else
+        {
+            srcCount = BuildIndirUses(putArgChild->AsIndir());
+        }
+        assert(putArgChild->isContained());
+    }
+    buildInternalRegisterUses();
+    assert((argMask != RBM_NONE) && ((int)genCountBits(argMask) == dstCount));
+    for (int i = 0; i < dstCount; i++)
+    {
+        BuildDef(argNode, argMaskArr[i], i);
+    }
+    return srcCount;
+}
+#endif // FEATURE_ARG_SPLIT
+
+//------------------------------------------------------------------------
+// BuildBlockStore: Build the RefPositions for a block store node.
+//
+// Arguments:
+//    blkNode       - The block store node of interest
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
+{
+    GenTree* dstAddr = blkNode->Addr();
+    GenTree* src     = blkNode->Data();
+    unsigned size    = blkNode->Size();
+
+    GenTree* srcAddrOrFill = nullptr;
+
+    regMaskTP dstAddrRegMask = RBM_NONE;
+    regMaskTP srcRegMask     = RBM_NONE;
+    regMaskTP sizeRegMask    = RBM_NONE;
+
+    if (blkNode->OperIsInitBlkOp())
+    {
+        if (src->OperIs(GT_INIT_VAL))
+        {
+            assert(src->isContained());
+            src = src->AsUnOp()->gtGetOp1();
+        }
+
+        srcAddrOrFill = src;
+
+        switch (blkNode->gtBlkOpKind)
+        {
+            case GenTreeBlk::BlkOpKindUnroll:
+                break;
+
+            case GenTreeBlk::BlkOpKindHelper:
+                assert(!src->isContained());
+                dstAddrRegMask = RBM_ARG_0;
+                srcRegMask     = RBM_ARG_1;
+                sizeRegMask    = RBM_ARG_2;
+                break;
+
+            default:
+                unreached();
+        }
+    }
+    else
+    {
+        if (src->OperIs(GT_IND))
+        {
+            assert(src->isContained());
+            srcAddrOrFill = src->AsIndir()->Addr();
+        }
+
+        if (blkNode->OperIs(GT_STORE_OBJ))
+        {
+            // We don't need to materialize the struct size but we still need
+            // a temporary register to perform the sequence of loads and stores.
+            // We can't use the special Write Barrier registers, so exclude them from the mask
+            regMaskTP internalIntCandidates =
+                allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
+            buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
+
+            if (size >= 2 * REGSIZE_BYTES)
+            {
+                // We will use ldp/stp to reduce code size and improve performance
+                // so we need to reserve an extra internal register
+                buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
+            }
+
+            // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
+            dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
+
+            // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
+            // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
+            // which is killed by a StoreObj (and thus needn't be reserved).
+            if (srcAddrOrFill != nullptr)
+            {
+                assert(!srcAddrOrFill->isContained());
+                srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
+            }
+        }
+        else
+        {
+            switch (blkNode->gtBlkOpKind)
+            {
+                case GenTreeBlk::BlkOpKindUnroll:
+                    buildInternalIntRegisterDefForNode(blkNode);
+                    break;
+
+                case GenTreeBlk::BlkOpKindHelper:
+                    dstAddrRegMask = RBM_ARG_0;
+                    if (srcAddrOrFill != nullptr)
+                    {
+                        assert(!srcAddrOrFill->isContained());
+                        srcRegMask = RBM_ARG_1;
+                    }
+                    sizeRegMask = RBM_ARG_2;
+                    break;
+
+                default:
+                    unreached();
+            }
+        }
+    }
+
+    if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (sizeRegMask != RBM_NONE))
+    {
+        // Reserve a temp register for the block size argument.
+        buildInternalIntRegisterDefForNode(blkNode, sizeRegMask);
+    }
+
+    int useCount = 0;
+
+    if (!dstAddr->isContained())
+    {
+        useCount++;
+        BuildUse(dstAddr, dstAddrRegMask);
+    }
+    else if (dstAddr->OperIsAddrMode())
+    {
+        useCount += BuildAddrUses(dstAddr->AsAddrMode()->Base());
+    }
+
+    if (srcAddrOrFill != nullptr)
+    {
+        if (!srcAddrOrFill->isContained())
+        {
+            useCount++;
+            BuildUse(srcAddrOrFill, srcRegMask);
+        }
+        else if (srcAddrOrFill->OperIsAddrMode())
+        {
+            useCount += BuildAddrUses(srcAddrOrFill->AsAddrMode()->Base());
+        }
+    }
+
+    if (blkNode->OperIs(GT_STORE_DYN_BLK))
+    {
+        useCount++;
+        BuildUse(blkNode->AsDynBlk()->gtDynamicSize, sizeRegMask);
+    }
+
+    buildInternalRegisterUses();
+    regMaskTP killMask = getKillSetForBlockStore(blkNode);
+    BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
+    return useCount;
+}
+
+//------------------------------------------------------------------------
+// BuildCast: Set the NodeInfo for a GT_CAST.
+//
+// Arguments:
+//    cast - The GT_CAST node
+//
+// Return Value:
+//    The number of sources consumed by this node.
+//
+int LinearScan::BuildCast(GenTreeCast* cast)
+{
+    GenTree* src = cast->gtGetOp1();
+
+    const var_types srcType  = genActualType(src->TypeGet());
+    const var_types castType = cast->gtCastType;
+
+    // Overflow checking cast from TYP_LONG to TYP_INT requires a temporary register to
+    // store the min and max immediate values that cannot be encoded in the CMP instruction.
+    if (cast->gtOverflow() && varTypeIsLong(srcType) && !cast->IsUnsigned() && (castType == TYP_INT))
+    {
+        buildInternalIntRegisterDefForNode(cast);
+    }
+
+    int srcCount = BuildOperandUses(src);
+    buildInternalRegisterUses();
+    BuildDef(cast);
+    return srcCount;
+}
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index f3aceac0dd14c..29d9e37fea8f7 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -179,7 +179,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
     if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
     {
         if (srcType == TYP_FLOAT
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // Arm64: src = float, dst is overflow conversion.
             // This goes through helper and hence src needs to be converted to double.
             && tree->gtOverflow()
@@ -214,7 +214,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
         {
             if (!tree->gtOverflow())
             {
-#ifdef TARGET_ARM64 // ARM64 supports all non-overflow checking conversions directly.
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)// On ARM64 All non-overflow checking conversions can be optimized
                 return nullptr;
 #else
                 switch (dstType)
@@ -903,6 +903,36 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned
 }
 #endif // defined(UNIX_AMD64_ABI)
 
+#if defined(TARGET_LOONGARCH64)
+fgArgTabEntry* fgArgInfo::AddRegArg(unsigned          argNum,
+                                    GenTree*          node,
+                                    GenTreeCall::Use* use,
+                                    regNumber         regNum,
+                                    unsigned          numRegs,
+                                    unsigned          byteSize,
+                                    unsigned          byteAlignment,
+                                    bool              isStruct,
+                                    bool              isFloatHfa, /* unused */
+                                    bool              isVararg,
+                                    const regNumber   otherRegNum)
+{
+    fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg);
+    assert(curArgTabEntry != nullptr);
+
+    curArgTabEntry->isStruct = isStruct; // is this a struct arg
+
+    INDEBUG(curArgTabEntry->checkIsStruct();)
+    assert(numRegs <= 2);
+    if (numRegs == 2)
+    {
+        curArgTabEntry->setRegNum(1, otherRegNum);
+        //curArgTabEntry->isSplit = true;
+    }
+
+    return curArgTabEntry;
+}
+#endif // defined(TARGET_LOONGARCH64)
+
 fgArgTabEntry* fgArgInfo::AddStkArg(unsigned          argNum,
                                     GenTree*          node,
                                     GenTreeCall::Use* use,
@@ -1751,7 +1781,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
     if (varTypeIsStruct(type))
     {
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64)
 
         // Can this type be passed as a primitive type?
         // If so, the following call will return the corresponding primitive type.
@@ -1813,6 +1843,19 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
                 // This will be passed by value in two registers.
                 assert(addrNode != nullptr);
 
+                // Create an Obj of the temp to use it as a call argument.
+                arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
+            }
+#elif defined(TARGET_LOONGARCH64)
+            assert(varTypeIsStruct(type));
+            if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg()))
+            {
+                // ToDo-LOONGARCH64: Consider using:  arg->ChangeOper(GT_LCL_FLD);
+                // as that is how UNIX_AMD64_ABI works.
+                // We will create a GT_OBJ for the argument below.
+                // This will be passed by value in two registers.
+                assert(addrNode != nullptr);
+
                 // Create an Obj of the temp to use it as a call argument.
                 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
             }
@@ -1823,7 +1866,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
 #endif // FEATURE_MULTIREG_ARGS
         }
 
-#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM)
+#else // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM or TARGET_LOONGARCH64)
 
         // other targets, we pass the struct by value
         assert(varTypeIsStruct(type));
@@ -1834,7 +1877,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
         // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
         arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
 
-#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM)
+#endif // not (TARGET_AMD64 or TARGET_ARM64 or TARGET_ARM or TARGET_LOONGARCH64)
 
     } // (varTypeIsStruct(type))
 
@@ -1959,7 +2002,7 @@ void fgArgInfo::EvalArgsToTemps()
                     if (setupArg->OperIsCopyBlkOp())
                     {
                         setupArg = compiler->fgMorphCopyBlock(setupArg);
-#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI)
+#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
                         if (lclVarType == TYP_STRUCT)
                         {
                             // This scalar LclVar widening step is only performed for ARM architectures.
@@ -1970,7 +2013,7 @@ void fgArgInfo::EvalArgsToTemps()
                             scalarType =
                                 compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->IsVararg());
                         }
-#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI)
+#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
                     }
 
                     // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4)  or (5,6,7 =>
@@ -2847,6 +2890,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
         passUsingFloatRegs = false;
 
+#elif defined(TARGET_LOONGARCH64)
+        assert(!callIsVararg);
+        assert(!isHfaArg);
+        passUsingFloatRegs = !callIsVararg && varTypeIsFloating(argx);
+
 #else
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -2963,6 +3011,27 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             size     = genTypeStSz(argx->gtType);
             byteSize = genTypeSize(argx);
         }
+#elif defined(TARGET_LOONGARCH64)
+        DWORD numFloatFields = 0;
+        if (!isStructArg)
+        {
+            size = 1;
+            byteSize = genTypeSize(argx);
+        }
+        else
+        {
+            // Structs are either passed in 1 or 2 (64-bit) slots.
+            // Structs that are the size of 2 pointers are passed by value in multiple registers,
+            // if sufficient registers are available.
+            // Structs that are larger than 2 pointers are passed by reference (to a copy).
+            size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
+
+            if (size > 2)
+            {
+                size = 1;
+            }
+            byteSize = structSize;
+        }
 #else
 #error Unsupported or unset target architecture
 #endif // TARGET_XXX
@@ -2975,6 +3044,27 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             structPassingKind howToPassStruct;
             structBaseType  = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, structSize);
             passStructByRef = (howToPassStruct == SPK_ByReference);
+#if defined(TARGET_LOONGARCH64)
+            if (!passStructByRef)
+            {
+                assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType));
+
+                numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass);
+
+                passUsingFloatRegs = (numFloatFields & 0xf) ? true : false;
+                compFloatingPointUsed |= passUsingFloatRegs;
+
+                if (numFloatFields & 7)
+                    size = 1;
+                else if (numFloatFields & 8)
+                    size = 2;
+            }
+            else //if (passStructByRef)
+            {
+                size = 1;
+                byteSize = TARGET_POINTER_SIZE;
+            }
+#else
             if (howToPassStruct == SPK_ByReference)
             {
                 byteSize = TARGET_POINTER_SIZE;
@@ -3003,6 +3093,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             {
                 size = 1;
             }
+#endif
         }
 
         const var_types argType = args->GetNode()->TypeGet();
@@ -3023,6 +3114,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             argAlignBytes = eeGetArgAlignment(argType, isFloatHfa);
         }
 
+#ifdef TARGET_LOONGARCH64
+        regNumber nextOtherRegNum = REG_STK;
+#endif
         //
         // Figure out if the argument will be passed in a register.
         //
@@ -3117,7 +3211,69 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                     }
                 }
             }
-#else // not TARGET_ARM or TARGET_ARM64
+
+#elif defined(TARGET_LOONGARCH64)
+            if (passUsingFloatRegs)
+            {
+                // Check if the last register needed is still in the fp argument register range.
+                passUsingFloatRegs = isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
+
+                if (isStructArg)
+                {
+                    if ((numFloatFields & 0x6) && passUsingFloatRegs)
+                        passUsingFloatRegs = isRegArg = intArgRegNum < maxRegArgs;
+
+                    if (!passUsingFloatRegs)
+                    {
+                        size = structSize > 8 ? 2 : 1;
+                        numFloatFields = 0;
+                    }
+                    else if (passUsingFloatRegs)
+                    {
+                        if (numFloatFields & 0x8)
+                            nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1);
+                        else if (numFloatFields & 0x4)
+                        {
+                            assert(size == 1);
+                            size = 2;
+                            passUsingFloatRegs = false;
+                            nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum);
+                        }
+                        else if (/*(size == 1) && */(numFloatFields & 0x2))
+                        {
+                            assert((size == 1) && (numFloatFields & 0x2));
+                            size = 2;
+                            nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum);
+                        }
+                    }
+                }
+
+                assert(!isHfaArg);//LOONGARCH not support HFA.
+            }
+
+            // if run out the fp argument register, try the int argument register.
+            if (!isRegArg)
+            {
+                // Check if the last register needed is still in the int argument register range.
+                isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
+                if (!passUsingFloatRegs && isRegArg && (size > 1))
+                    nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum + 1);
+
+                // Did we run out of registers when we had a 16-byte struct (size===2) ?
+                // (i.e we only have one register remaining but we needed two registers to pass this arg)
+                // This prevents us from backfilling a subsequent arg into x7
+                //
+                if (!isRegArg && (size > 1))
+                {
+                    // We also must update intArgRegNum so that we no longer try to
+                    // allocate any new general purpose registers for args
+                    //
+                    isRegArg = intArgRegNum < maxRegArgs;//the split-struct case.
+                    nextOtherRegNum = REG_STK;
+                    //assert((intArgRegNum + 1) == maxRegArgs);
+                }
+            }
+#else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64
 
 #if defined(UNIX_AMD64_ABI)
 
@@ -3269,7 +3425,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             // This is a register argument - put it in the table
             newArgEntry =
                 call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, byteSize, argAlignBytes, isStructArg,
-                                           isFloatHfa, callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
+                                           isFloatHfa, callIsVararg UNIX_LOONGARCH64_ONLY_ARG(nextOtherRegNum) UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
                                                            UNIX_AMD64_ABI_ONLY_ARG(structIntRegs)
                                                                UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs)
                                                                    UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
@@ -3278,6 +3434,48 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             // Set up the next intArgRegNum and fltArgRegNum values.
             if (!isBackFilled)
             {
+#if defined(TARGET_LOONGARCH64)
+                // Increment intArgRegNum by 'size' registers
+                if (!isNonStandard)
+                {
+                    if ((size > 1) && ((intArgRegNum + 1) == maxRegArgs) && (nextOtherRegNum == REG_STK))
+                    {
+#if FEATURE_ARG_SPLIT
+                        // This indicates a partial enregistration of a struct type
+                        assert((isStructArg) || argx->OperIs(GT_FIELD_LIST) || argx->OperIsCopyBlkOp() ||
+                               (argx->gtOper == GT_COMMA && (argx->gtFlags & GTF_ASG)));
+                        call->fgArgInfo->SplitArg(argIndex, 1, 1);
+#endif // FEATURE_ARG_SPLIT
+                        assert(!passUsingFloatRegs);
+                        assert(size == 2);
+                        //assert(nextOtherRegNum == REG_STK);
+                        intArgRegNum = maxRegArgs;
+                    }
+                    else if ((numFloatFields & 0xf) == 0x0)
+                    {
+                        if (passUsingFloatRegs)
+                            fltArgRegNum += 1;
+                        else
+                            intArgRegNum += size;
+                    }
+                    else if (numFloatFields & 0x1)
+                    {
+                        structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT;
+                        fltArgRegNum += 1;
+                    }
+                    else if (numFloatFields & 0x6)
+                    {
+                        //assert((numFloatFields & 0x2) || (numFloatFields & 0x4));
+                        fltArgRegNum += 1;
+                        intArgRegNum += 1;
+                    }
+                    else if (numFloatFields & 0x8)
+                    {
+                        fltArgRegNum += 2;
+                    }
+                }
+#else
+
 #if defined(UNIX_AMD64_ABI)
                 if (isStructArg)
                 {
@@ -3326,6 +3524,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                         }
                     }
                 }
+#endif // defined(TARGET_LOONGARCH64)
             }
         }
         else // We have an argument that is not passed in a register
@@ -3358,7 +3557,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
         if (newArgEntry->isStruct)
         {
             newArgEntry->passedByRef = passStructByRef;
+#if defined(TARGET_LOONGARCH64)
+            newArgEntry->argType     = (numFloatFields & 0xe) || (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
+#else
             newArgEntry->argType     = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
+#endif
         }
         else
         {
@@ -3571,6 +3774,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                 assert(!"Structs are not passed by reference on x64/ux");
 #endif // UNIX_AMD64_ABI
             }
+#if defined(DEBUG) && defined(TARGET_LOONGARCH64)
+            else if ((structBaseType == TYP_STRUCT) && (originalSize == TARGET_POINTER_SIZE) && (size == 2))
+            {
+                DEBUG_ARG_SLOTS_ASSERT(size == argEntry->numRegs);
+            }
+#endif
             else // This is passed by value.
             {
                 // Check to see if we can transform this into load of a primitive type.
@@ -3596,7 +3805,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         canTransform = (!argEntry->IsHfaArg() || (passingSize == genTypeSize(argEntry->GetHfaType())));
                     }
 
-#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI)
+#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
                     // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can
                     // only transform in that case if the arg is a local.
                     // TODO-CQ: This transformation should be applicable in general, not just for the ARM64
@@ -3606,7 +3815,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         canTransform = (lclVar != nullptr);
                         passingSize  = genTypeSize(structBaseType);
                     }
-#endif //  TARGET_ARM64 || UNIX_AMD64_ABI
+#endif //  TARGET_ARM64 || UNIX_AMD64_ABI || TARGET_LOONGARCH64
                 }
 
                 if (!canTransform)
@@ -3649,6 +3858,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     {
                         copyBlkClass = objClass;
                     }
+#elif defined(TARGET_LOONGARCH64)
+                    if ((passingSize != structSize) && (lclVar == nullptr))
+                    {
+                        copyBlkClass = objClass;
+                    }
 #endif
 
 #ifdef TARGET_ARM
@@ -3767,7 +3981,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                            ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsEnregisterable(structBaseType)));
                 }
 
-#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH)
+#if !defined(UNIX_AMD64_ABI) && !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
                 // TODO-CQ-XARCH: there is no need for a temp copy if we improve our code generation in
                 // `genPutStructArgStk` for xarch like we did it for Arm/Arm64.
 
@@ -3860,8 +4074,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 #if FEATURE_MULTIREG_ARGS
         if (isStructArg)
         {
+#if defined(TARGET_LOONGARCH64)
+            if ((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1)
+#else
             if (((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1) ||
                 (isHfaArg && argx->TypeGet() == TYP_STRUCT))
+#endif
             {
                 hasMultiregStructArgs = true;
             }
@@ -4093,6 +4311,28 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
         if ((size > 1) || (fgEntryPtr->IsHfaArg() && argx->TypeGet() == TYP_STRUCT))
         {
             foundStructArg = true;
+#if defined(TARGET_LOONGARCH64)
+            if (!argx->OperIs(GT_FIELD_LIST))
+            {
+                GenTree* newArgx = fgMorphMultiregStructArg(argx, fgEntryPtr);
+
+                // Did we replace 'argx' with a new tree?
+                if (newArgx != argx)
+                {
+                    // link the new arg node into either the late arg list or the gtCallArgs list
+                    if (isLateArg)
+                    {
+                        lateUse->SetNode(newArgx);
+                    }
+                    else
+                    {
+                        use.SetNode(newArgx);
+                    }
+
+                    assert(fgEntryPtr->GetNode() == newArgx);
+                }
+            }
+#else
             if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST))
             {
                 if (fgEntryPtr->IsHfaRegArg())
@@ -4142,6 +4382,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
                     assert(fgEntryPtr->GetNode() == newArgx);
                 }
             }
+#endif
         }
     }
 
@@ -4180,9 +4421,11 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
 //
 GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr)
 {
+#if !defined(TARGET_LOONGARCH64)
     assert(varTypeIsStruct(arg->TypeGet()));
+#endif
 
-#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI)
+#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
     NYI("fgMorphMultiregStructArg requires implementation for this target");
 #endif
 
@@ -4233,7 +4476,36 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
 #if FEATURE_MULTIREG_ARGS
     // Examine 'arg' and setup argValue objClass and structSize
     //
+#if defined(TARGET_LOONGARCH64)
+    const CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg);
+    if (objClass == NO_CLASS_HANDLE)
+    {
+        assert(arg->TypeGet() != TYP_STRUCT);
+        assert(arg->OperGet() == GT_LCL_FLD);
+        assert(fgEntryPtr->numRegs == 2);
+
+        GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
+        unsigned             varNum  = varNode->GetLclNum();
+        assert(varNum < lvaCount);
+        LclVarDsc* varDsc = &lvaTable[varNum];
+        assert(varDsc->lvExactSize == 8);
+
+        unsigned offset = arg->AsLclVarCommon()->GetLclOffs();
+        GenTreeFieldList* newArg = nullptr;
+        var_types tmp_type = fgEntryPtr->isPassedInFloatRegisters() ? TYP_FLOAT : TYP_INT;
+        arg->gtType = tmp_type;
+
+        newArg = new (this, GT_FIELD_LIST) GenTreeFieldList();
+        newArg->AddField(this, arg, offset, tmp_type);
+        tmp_type = isValidFloatArgReg(fgEntryPtr->GetOtherRegNum()) ? TYP_FLOAT : TYP_INT;
+        GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type, offset + 4);
+        newArg->AddField(this, nextLclFld, offset + 4, tmp_type);
+
+        return newArg;
+    }
+#else
     const CORINFO_CLASS_HANDLE objClass   = gtGetStructHandle(arg);
+#endif
     GenTree*                   argValue   = arg; // normally argValue will be arg, but see right below
     unsigned                   structSize = 0;
 
@@ -4351,11 +4623,11 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                     case 2:
                         type[lastElem] = TYP_SHORT;
                         break;
-#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI)
+#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
                     case 4:
                         type[lastElem] = TYP_INT;
                         break;
-#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI)
+#endif // (TARGET_ARM64) || (UNIX_AMD64_ABI) || (TARGET_LOONGARCH64)
                     default:
                         noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
                         break;
@@ -4394,6 +4666,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
 #endif // DEBUG
 
 #ifndef UNIX_AMD64_ABI
+#if !defined(TARGET_LOONGARCH64)
         // This local variable must match the layout of the 'objClass' type exactly
         if (varDsc->lvIsHfa()
 #if !defined(HOST_UNIX) && defined(TARGET_ARM64)
@@ -4413,6 +4686,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             }
         }
         else
+#endif
         {
 #if defined(TARGET_ARM64)
             // We must have a 16-byte struct (non-HFA)
@@ -4555,7 +4829,16 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             //
             lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DoNotEnregisterReason::LocalField));
         }
-#endif // TARGET_ARM
+#elif defined(TARGET_LOONGARCH64)
+        // Is this LclVar a promoted struct with exactly same size?
+        assert(!varDsc->lvPromoted);
+
+        assert(structSize >= TARGET_POINTER_SIZE);
+        {
+            // We will create a list of GT_LCL_FLDs nodes to pass this struct
+            lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DoNotEnregisterReason::LocalField));
+        }
+#endif // TARGET_LOONGARCH64
     }
 
     // If we didn't set newarg to a new List Node tree
@@ -4621,12 +4904,66 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             //
             unsigned offset = baseOffset;
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
+#if defined(TARGET_LOONGARCH64)
+            DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass);
+            if ((numFloatFields & 0xe) /*&& (varDsc->lvSize() == TARGET_POINTER_SIZE)*/)
+            {
+                assert((numFloatFields & 0xf) > 1);
+                var_types tmp_type_1;
+                var_types tmp_type_2;
+
+                compFloatingPointUsed = true;
+                if (numFloatFields & 0x8)
+                {
+                    tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                }
+                else if (numFloatFields & 0x2)
+                {
+                    tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
+                    //tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;type[0]
+                    tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT;
+                }
+                else if (numFloatFields & 0x4)
+                {
+                    //tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT;
+                    tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT;
+                    tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                }
+                else
+                {
+                    assert(!"----------------unimplemented type-case... on LOONGARCH");
+                    unreached();
+                }
+                elemSize = numFloatFields & 0x30 ? 8 : 4;;
+
+                GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset);
+                newArg->AddField(this, nextLclFld, offset, tmp_type_1);
+                offset += elemSize;
+                nextLclFld = gtNewLclFldNode(varNum, tmp_type_2, offset);
+                newArg->AddField(this, nextLclFld, offset, tmp_type_2);
+            }
+            else
+            {
+                GenTree* nextLclFld = gtNewLclFldNode(varNum, type[0], offset);
+                newArg->AddField(this, nextLclFld, offset, type[0]);
+
+                if (elemCount > 1)
+                {
+                    assert(elemCount == 2);
+                    elemSize = genTypeSize(type[1]);
+                    nextLclFld = gtNewLclFldNode(varNum, type[1], offset + elemSize);
+                    newArg->AddField(this, nextLclFld, offset + elemSize, type[1]);
+                }
+            }
+#else
             for (unsigned inx = 0; inx < elemCount; inx++)
             {
                 GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
                 newArg->AddField(this, nextLclFld, offset, type[inx]);
                 offset += genTypeSize(type[inx]);
             }
+#endif
         }
         // Are we passing a GT_OBJ struct?
         //
@@ -4656,6 +4993,59 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
 
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
             unsigned offset = 0;
+#if defined(TARGET_LOONGARCH64)
+            DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass);
+            if (numFloatFields & 0xe)
+            {
+                assert((numFloatFields & 0xf) > 1);
+                var_types tmp_type_1;
+                var_types tmp_type_2;
+
+                compFloatingPointUsed = true;
+                if (numFloatFields & 0x8)
+                {
+                    tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                }
+                else if (numFloatFields & 0x2)
+                {
+                    tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
+                    //tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;
+                    tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT;
+                }
+                else if (numFloatFields & 0x4)
+                {
+                    //tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT;
+                    tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT;
+                    tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                }
+                else
+                {
+                    assert(!"----------------unimplemented type-case... on LOONGARCH");
+                    unreached();
+                }
+                elemSize = numFloatFields & 0x30 ? 8 : 4;;
+
+                GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr);
+                // For safety all GT_IND should have at least GT_GLOB_REF set.
+                curItem->gtFlags |= GTF_GLOB_REF;
+
+                newArg = new (this, GT_FIELD_LIST) GenTreeFieldList();
+                newArg->AddField(this, curItem, 0, tmp_type_1);
+
+                //GenTree* curAddr = baseAddr;
+                baseAddr = gtCloneExpr(baseAddr);
+                noway_assert(baseAddr != nullptr);
+                baseAddr = gtNewOperNode(GT_ADD, addrType, baseAddr, gtNewIconNode(elemSize, TYP_I_IMPL));
+
+                curItem = gtNewIndir(tmp_type_2, baseAddr);
+                // For safety all GT_IND should have at least GT_GLOB_REF set.
+                curItem->gtFlags |= GTF_GLOB_REF;
+
+                newArg->AddField(this, curItem, elemSize, tmp_type_2);
+            }
+            else
+#endif
             for (unsigned inx = 0; inx < elemCount; inx++)
             {
                 GenTree* curAddr = baseAddr;
@@ -5060,6 +5450,12 @@ void Compiler::fgMoveOpsLeft(GenTree* tree)
             noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
             new_op1->gtType = TYP_I_IMPL;
         }
+#ifdef TARGET_LOONGARCH64
+        else if ((op1->TypeGet() == TYP_LONG) && (ad2->TypeGet() == TYP_INT))
+        {
+            new_op1->gtType = TYP_LONG;
+        }
+#endif
 
         // If new_op1 is a new expression. Assign it a new unique value number.
         // vnStore is null before the ValueNumber phase has run
@@ -5352,6 +5748,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
             noway_assert(index2 != nullptr);
         }
 
+#ifndef TARGET_LOONGARCH64
         // Next introduce a GT_ARR_BOUNDS_CHECK node
         var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
 
@@ -5371,6 +5768,9 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
         {
             arrLen = gtNewCastNode(bndsChkType, arrLen, true, bndsChkType);
         }
+#else
+        GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB);
+#endif
 
         GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
             GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
@@ -5389,6 +5789,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
     GenTree* addr;
 
 #ifdef TARGET_64BIT
+#ifndef TARGET_LOONGARCH64
     // Widen 'index' on 64-bit targets
     if (index->TypeGet() != TYP_I_IMPL)
     {
@@ -5401,6 +5802,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
             index = gtNewCastNode(TYP_I_IMPL, index, true, TYP_I_IMPL);
         }
     }
+#endif
 #endif // TARGET_64BIT
 
     /* Scale the index value if necessary */
@@ -12226,6 +12628,22 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
             break;
 #endif
 
+#ifdef TARGET_LOONGARCH64
+        case GT_DIV:
+        case GT_MOD:
+            if (!varTypeIsFloating(tree->gtType))
+            {
+                // Codegen for this instruction needs to be able to throw two exceptions:
+                fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW);
+                fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
+            }
+            break;
+        case GT_UDIV:
+        case GT_UMOD:
+            // Codegen for this instruction needs to be able to throw one exception:
+            fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
+            break;
+#endif
         case GT_ADD:
 
         CM_OVF_OP:
@@ -13948,6 +14366,12 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
             // be in a fully-interruptible code region.
             if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet()))
             {
+#ifdef TARGET_LOONGARCH64
+                if ((op2->TypeGet() == TYP_LONG) /*&& (op1->TypeGet() == TYP_INT)*/)
+                {
+                    op1->gtType = TYP_LONG;
+                }
+#endif
                 tree->gtOp2 = ad2;
 
                 op1->AsOp()->gtOp2 = op2;
@@ -17139,7 +17563,7 @@ void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent)
 
 void Compiler::fgResetImplicitByRefRefCount()
 {
-#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64)
+#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 #ifdef DEBUG
     if (verbose)
     {
@@ -17162,7 +17586,7 @@ void Compiler::fgResetImplicitByRefRefCount()
         }
     }
 
-#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 //------------------------------------------------------------------------
@@ -17176,7 +17600,7 @@ void Compiler::fgResetImplicitByRefRefCount()
 
 void Compiler::fgRetypeImplicitByRefArgs()
 {
-#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64)
+#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 #ifdef DEBUG
     if (verbose)
     {
@@ -17375,7 +17799,7 @@ void Compiler::fgRetypeImplicitByRefArgs()
         }
     }
 
-#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 //------------------------------------------------------------------------
@@ -17388,7 +17812,7 @@ void Compiler::fgMarkDemotedImplicitByRefArgs()
 {
     JITDUMP("\n*************** In fgMarkDemotedImplicitByRefArgs()\n");
 
-#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64)
+#if (defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
     for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
     {
@@ -17449,7 +17873,7 @@ void Compiler::fgMarkDemotedImplicitByRefArgs()
         }
     }
 
-#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 /*****************************************************************************
@@ -17459,11 +17883,11 @@ void Compiler::fgMarkDemotedImplicitByRefArgs()
  */
 bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree)
 {
-#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64)
+#if (!defined(TARGET_AMD64) || defined(UNIX_AMD64_ABI)) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
 
     return false;
 
-#else  // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#else  // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 
     bool changed = false;
 
@@ -17498,7 +17922,7 @@ bool Compiler::fgMorphImplicitByRefArgs(GenTree* tree)
     }
 
     return changed;
-#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64
+#endif // (TARGET_AMD64 && !UNIX_AMD64_ABI) || TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr)
@@ -17647,7 +18071,11 @@ GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr)
 void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZero)
 {
     // We expect 'addr' to be an address at this point.
+#ifdef TARGET_LOONGARCH64
+    assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT || addr->TypeGet() == TYP_REF);
+#else
     assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF);
+#endif
 
     // Tunnel through any commas.
     const bool commaOnly = true;
diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp
index 5fc26491fc616..1dff1eba7a6d8 100644
--- a/src/coreclr/jit/optimizer.cpp
+++ b/src/coreclr/jit/optimizer.cpp
@@ -5114,7 +5114,9 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu
     unsigned   kind;
 
     noway_assert(tree);
+#ifndef TARGET_LOONGARCH64
     noway_assert(genActualType(tree->gtType) == genActualType(srct));
+#endif
 
     /* Assume we're only handling integer types */
     noway_assert(varTypeIsIntegral(srct));
@@ -5282,8 +5284,13 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu
         switch (tree->gtOper)
         {
             case GT_AND:
+#ifdef TARGET_LOONGARCH64
+                noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op1->gtType)));
+                noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op2->gtType)));
+#else
                 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
                 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
+#endif
 
                 GenTree* opToNarrow;
                 opToNarrow = nullptr;
diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp
index 939ea56badf2c..5720c4a23e3eb 100644
--- a/src/coreclr/jit/regalloc.cpp
+++ b/src/coreclr/jit/regalloc.cpp
@@ -162,6 +162,18 @@ regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc
 #if FEATURE_MULTIREG_ARGS
     if (varTypeIsStruct(argDsc->lvType))
     {
+#ifdef TARGET_LOONGARCH64
+        {
+            if (argDsc->GetOtherArgReg() != REG_NA)
+            {
+                inArgMask = genRegMask(argDsc->GetOtherArgReg());
+                if (emitter::isFloatReg(argDsc->GetOtherArgReg()))
+                    codeGen->floatRegState.rsCalleeRegArgMaskLiveIn |= inArgMask;
+                else
+                    codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= inArgMask;
+            }
+        }
+#else
         if (argDsc->lvIsHfaRegArg())
         {
             assert(regState->rsIsFloat);
@@ -186,6 +198,7 @@ regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc
                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
             }
         }
+#endif
     }
 #endif // FEATURE_MULTIREG_ARGS
 
@@ -256,6 +269,16 @@ bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
     }
 #endif // TARGET_ARM64
 
+#ifdef TARGET_LOONGARCH64
+    // TODO-LOONGARCH64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog
+    // can handle non-frame pointer frames.
+    if (!result)
+    {
+        INDEBUG(reason = "Temporary LOONGARCH64 force frame pointer");
+        result = true;
+    }
+#endif // TARGET_LOONGARCH64
+
 #ifdef DEBUG
     if ((result == true) && (wbReason != nullptr))
     {
diff --git a/src/coreclr/jit/register.h b/src/coreclr/jit/register.h
index d06bef0cea1d9..971974722eee8 100644
--- a/src/coreclr/jit/register.h
+++ b/src/coreclr/jit/register.h
@@ -103,6 +103,9 @@ REGDEF(STK,    16+XMMBASE,  0x0000,       "STK"  )
 #elif defined(TARGET_ARM64)
  #include "registerarm64.h"
 
+#elif defined(TARGET_LOONGARCH64)
+ #include "registerloongarch64.h"
+
 #else
   #error Unsupported or unset target architecture
 #endif // target type
diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp
index a90e61c3a59fd..16c3f051f74bf 100644
--- a/src/coreclr/jit/register_arg_convention.cpp
+++ b/src/coreclr/jit/register_arg_convention.cpp
@@ -42,6 +42,17 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
         // For System V the reg type counters should be independent.
         nextReg(TYP_INT, numRegs);
         nextReg(TYP_FLOAT, numRegs);
+#elif defined(TARGET_LOONGARCH64)
+        // LA-ABI64.
+        if (numRegs > MAX_PASS_MULTIREG_BYTES/TARGET_POINTER_SIZE)
+        {
+            assert(varTypeIsStruct(type));
+            nextReg(TYP_INT, 1);//TYP_BYREF
+        }//TODO:struct-float.
+        else
+        {
+            nextReg(type, numRegs);
+        }
 #else
         // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
         nextReg(type, numRegs);
diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h
new file mode 100644
index 0000000000000..4127ce8ca4ace
--- /dev/null
+++ b/src/coreclr/jit/registerloongarch64.h
@@ -0,0 +1,108 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// clang-format off
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error  Must define REGDEF macro before including this file
+#endif
+#ifndef REGALIAS
+#define REGALIAS(alias, realname)
+#endif
+
+#define RMASK(x) (1ULL << (x))
+
+/*
+REGDEF(name, rnum,       mask, xname, wname) */
+REGDEF(R0,      0,     0x0001, "zero" , "zero" )
+REGDEF(RA,      1,     0x0002, "ra"   , "ra"   )
+REGDEF(TP,      2,     0x0004, "tp"   , "tp"   )
+REGDEF(SP,      3,     0x0008, "sp"   , "sp"   )
+REGDEF(A0,      4,     0x0010, "a0"   , "a0"   )
+REGDEF(A1,      5,     0x0020, "a1"   , "a1"   )
+REGDEF(A2,      6,     0x0040, "a2"   , "a2"   )
+REGDEF(A3,      7,     0x0080, "a3"   , "a3"   )
+REGDEF(A4,      8,     0x0100, "a4"   , "a4"   )
+REGDEF(A5,      9,     0x0200, "a5"   , "a5"   )
+REGDEF(A6,     10,     0x0400, "a6"   , "a6"   )
+REGDEF(A7,     11,     0x0800, "a7"   , "a7"   )
+REGDEF(T0,     12,     0x1000, "t0"   , "t0"   )
+REGDEF(T1,     13,     0x2000, "t1"   , "t1"   )
+REGDEF(T2,     14,     0x4000, "t2"   , "t2"   )
+REGDEF(T3,     15,     0x8000, "t3"   , "t3"   )
+REGDEF(T4,     16,    0x10000, "t4"   , "t4"   )
+REGDEF(T5,     17,    0x20000, "t5"   , "t5"   )
+REGDEF(T6,     18,    0x40000, "t6"   , "t6"   )
+REGDEF(T7,     19,    0x80000, "t7"   , "t7"   )
+REGDEF(T8,     20,   0x100000, "t8"   , "t8"   )
+REGDEF(X0,     21,   0x200000, "x0"   , "x0"   )
+REGDEF(FP,     22,   0x400000, "fp"   , "fp"   )
+REGDEF(S0,     23,   0x800000, "s0"   , "s0"   )
+REGDEF(S1,     24,  0x1000000, "s1"   , "s1"   )
+REGDEF(S2,     25,  0x2000000, "s2"   , "s2"   )
+REGDEF(S3,     26,  0x4000000, "s3"   , "s3"   )
+REGDEF(S4,     27,  0x8000000, "s4"   , "s4"   )
+REGDEF(S5,     28, 0x10000000, "s5"   , "s5"   )
+REGDEF(S6,     29, 0x20000000, "s6"   , "s6"   )
+REGDEF(S7,     30, 0x40000000, "s7"   , "s7"   )
+REGDEF(S8,     31, 0x80000000, "s8"   , "s8"   )
+
+
+REGALIAS(R21, X0)
+
+#define FBASE 32
+#define FMASK(x) (1ULL << (FBASE+(x)))
+
+/*
+REGDEF(name,  rnum,       mask,  xname,  wname) */
+REGDEF(F0,    0+FBASE, FMASK(0),   "f0",  "f0")
+REGDEF(F1,    1+FBASE, FMASK(1),   "f1",  "f1")
+REGDEF(F2,    2+FBASE, FMASK(2),   "f2",  "f2")
+REGDEF(F3,    3+FBASE, FMASK(3),   "f3",  "f3")
+REGDEF(F4,    4+FBASE, FMASK(4),   "f4",  "f4")
+REGDEF(F5,    5+FBASE, FMASK(5),   "f5",  "f5")
+REGDEF(F6,    6+FBASE, FMASK(6),   "f6",  "f6")
+REGDEF(F7,    7+FBASE, FMASK(7),   "f7",  "f7")
+REGDEF(F8,    8+FBASE, FMASK(8),   "f8",  "f8")
+REGDEF(F9,    9+FBASE, FMASK(9),   "f9",  "f9")
+REGDEF(F10,  10+FBASE, FMASK(10), "f10", "f10")
+REGDEF(F11,  11+FBASE, FMASK(11), "f11", "f11")
+REGDEF(F12,  12+FBASE, FMASK(12), "f12", "f12")
+REGDEF(F13,  13+FBASE, FMASK(13), "f13", "f13")
+REGDEF(F14,  14+FBASE, FMASK(14), "f14", "f14")
+REGDEF(F15,  15+FBASE, FMASK(15), "f15", "f15")
+REGDEF(F16,  16+FBASE, FMASK(16), "f16", "f16")
+REGDEF(F17,  17+FBASE, FMASK(17), "f17", "f17")
+REGDEF(F18,  18+FBASE, FMASK(18), "f18", "f18")
+REGDEF(F19,  19+FBASE, FMASK(19), "f19", "f19")
+REGDEF(F20,  20+FBASE, FMASK(20), "f20", "f20")
+REGDEF(F21,  21+FBASE, FMASK(21), "f21", "f21")
+REGDEF(F22,  22+FBASE, FMASK(22), "f22", "f22")
+REGDEF(F23,  23+FBASE, FMASK(23), "f23", "f23")
+REGDEF(F24,  24+FBASE, FMASK(24), "f24", "f24")
+REGDEF(F25,  25+FBASE, FMASK(25), "f25", "f25")
+REGDEF(F26,  26+FBASE, FMASK(26), "f26", "f26")
+REGDEF(F27,  27+FBASE, FMASK(27), "f27", "f27")
+REGDEF(F28,  28+FBASE, FMASK(28), "f28", "f28")
+REGDEF(F29,  29+FBASE, FMASK(29), "f29", "f29")
+REGDEF(F30,  30+FBASE, FMASK(30), "f30", "f30")
+REGDEF(F31,  31+FBASE, FMASK(31), "f31", "f31")
+
+// The registers with values 64 (NBASE) and above are not real register numbers
+#define NBASE 64
+
+// This must be last!
+REGDEF(STK,   0+NBASE, 0x0000,    "STK", "STK")
+
+/*****************************************************************************/
+#undef  RMASK
+#undef  FMASK
+#undef  FBASE
+#undef  NBASE
+#undef  REGDEF
+#undef  REGALIAS
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp
index 58439020fd693..aade930da4fd5 100644
--- a/src/coreclr/jit/regset.cpp
+++ b/src/coreclr/jit/regset.cpp
@@ -23,7 +23,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 /*****************************************************************************/
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 const regMaskSmall regMasks[] = {
 #define REGDEF(name, rnum, mask, xname, wname) mask,
 #include "register.h"
@@ -230,6 +230,8 @@ RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_r
 
 #ifdef TARGET_ARMARCH
     rsMaskCalleeSaved = RBM_NONE;
+#elif defined(TARGET_LOONGARCH64)
+    rsMaskCalleeSaved = RBM_NONE;
 #endif // TARGET_ARMARCH
 
 #ifdef TARGET_ARM
diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h
index 34a9bcea64629..a200b5686a7df 100644
--- a/src/coreclr/jit/regset.h
+++ b/src/coreclr/jit/regset.h
@@ -125,6 +125,8 @@ class RegSet
 
 #ifdef TARGET_ARMARCH
     regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
+#elif defined(TARGET_LOONGARCH64)
+    regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
 #endif                           // TARGET_ARM
 
 public:                    // TODO-Cleanup: Should be private, but Compiler uses it
diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp
index 69a63e020b46c..03ddbdfd7585c 100644
--- a/src/coreclr/jit/scopeinfo.cpp
+++ b/src/coreclr/jit/scopeinfo.cpp
@@ -295,7 +295,7 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc(
         case TYP_LONG:
         case TYP_DOUBLE:
 #endif // TARGET_64BIT
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // In the AMD64 ABI we are supposed to pass a struct by reference when its
             // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
             // the IR to comply with the ABI and therefore changes the type of the lclVar
@@ -314,7 +314,7 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc(
                 this->vlType = VLT_STK_BYREF;
             }
             else
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             {
                 this->vlType = VLT_STK;
             }
@@ -1600,11 +1600,34 @@ void CodeGen::psiBegProlog()
             if (!isStructHandled)
             {
 #ifdef DEBUG
+#ifdef TARGET_LOONGARCH64
+                var_types regType;
+                if (varTypeIsStruct(lclVarDsc))
+                {
+                    // Must be <= 16 bytes or else it wouldn't be passed in registers,
+                    // which can be bigger (and is handled above).
+                    noway_assert(EA_SIZE_IN_BYTES(lclVarDsc->lvSize()) <= 16);
+                    if (emitter::isFloatReg(lclVarDsc->GetArgReg()))
+                    {
+                        //regType = lclVarDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
+                        regType = TYP_DOUBLE;
+                    }
+                    else
+                        regType = lclVarDsc->GetLayout()->GetGCPtrType(0);
+                }
+                else
+                {
+                    regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet());
+                    if (emitter::isGeneralRegisterOrR0(lclVarDsc->GetArgReg()) && isFloatRegType(regType))
+                        regType = TYP_LONG;
+                }
+#else
                 var_types regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet());
                 if (lclVarDsc->lvIsHfaRegArg())
                 {
                     regType = lclVarDsc->GetHfaType();
                 }
+#endif
                 assert(genMapRegNumToRegArgNum(lclVarDsc->GetArgReg(), regType) != (unsigned)-1);
 #endif // DEBUG
 
diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h
index b6ab3166e10f8..e78b74616ce41 100644
--- a/src/coreclr/jit/target.h
+++ b/src/coreclr/jit/target.h
@@ -34,7 +34,7 @@ inline bool compMacOsArm64Abi()
 }
 inline bool compFeatureArgSplit()
 {
-    return TargetArchitecture::IsArm32 || (TargetOS::IsWindows && TargetArchitecture::IsArm64);
+    return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 || (TargetOS::IsWindows && TargetArchitecture::IsArm64);
 }
 inline bool compUnixX86Abi()
 {
@@ -51,6 +51,8 @@ inline bool compUnixX86Abi()
 #define TARGET_READABLE_NAME "ARM"
 #elif defined(TARGET_ARM64)
 #define TARGET_READABLE_NAME "ARM64"
+#elif defined(TARGET_LOONGARCH64)
+#define TARGET_READABLE_NAME "LOONGARCH64"
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -70,6 +72,10 @@ inline bool compUnixX86Abi()
 #define REGMASK_BITS 64
 #define CSE_CONST_SHARED_LOW_BITS 12
 
+#elif defined(TARGET_LOONGARCH64)
+#define REGMASK_BITS 64
+#define CSE_CONST_SHARED_LOW_BITS 12
+
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -170,6 +176,27 @@ enum _regMask_enum : unsigned
 #include "register.h"
 };
 
+#elif defined(TARGET_LOONGARCH64)
+
+enum _regNumber_enum : unsigned
+{
+#define REGDEF(name, rnum, mask, xname, wname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+    REG_COUNT,
+    REG_NA           = REG_COUNT,
+    ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
+};
+
+enum _regMask_enum : unsigned __int64
+{
+    RBM_NONE = 0,
+#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+};
+
 #else
 #error Unsupported target architecture
 #endif
@@ -185,7 +212,7 @@ enum _regMask_enum : unsigned
 // In any case, we believe that is OK to freely cast between these types; no information will
 // be lost.
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 typedef unsigned __int64 regMaskTP;
 #else
 typedef unsigned       regMaskTP;
@@ -237,6 +264,8 @@ typedef unsigned char   regNumberSmall;
 #include "targetarm.h"
 #elif defined(TARGET_ARM64)
 #include "targetarm64.h"
+#elif defined(TARGET_LOONGARCH64)
+#include "targetloongarch64.h"
 #else
   #error Unsupported or unset target architecture
 #endif
@@ -536,7 +565,7 @@ inline regMaskTP genRegMask(regNumber reg)
 
 inline regMaskTP genRegMaskFloat(regNumber reg, var_types type /* = TYP_DOUBLE */)
 {
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64)
     assert(genIsValidFloatReg(reg));
     assert((unsigned)reg < ArrLen(regMasks));
     return regMasks[reg];
@@ -672,7 +701,7 @@ inline bool isFloatRegType(var_types type)
 C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RBM_NONE);
 C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RBM_NONE);
 
-#if ETW_EBP_FRAMED
+#if ETW_EBP_FRAMED && !defined(TARGET_LOONGARCH64)
 // Frame pointer isn't either if we're supporting ETW frame chaining
 C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RBM_NONE);
 C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE);
diff --git a/src/coreclr/jit/targetloongarch64.cpp b/src/coreclr/jit/targetloongarch64.cpp
new file mode 100644
index 0000000000000..92f076eba3388
--- /dev/null
+++ b/src/coreclr/jit/targetloongarch64.cpp
@@ -0,0 +1,30 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Copyright (c) Loongson Technology. All rights reserved.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(TARGET_LOONGARCH64)
+
+#include "target.h"
+
+const char*            Target::g_tgtCPUName  = "loongarch64";
+const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L;
+const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L;
+
+// clang-format off
+const regNumber intArgRegs [] = {REG_A0, REG_A1, REG_A2, REG_A3, REG_A4, REG_A5, REG_A6, REG_A7};
+const regMaskTP intArgMasks[] = {RBM_A0, RBM_A1, RBM_A2, RBM_A3, RBM_A4, RBM_A5, RBM_A6, RBM_A7};
+
+const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7 };
+const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7 };
+// clang-format on
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
new file mode 100644
index 0000000000000..cf97f4148cf16
--- /dev/null
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -0,0 +1,336 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#pragma once
+
+#if !defined(TARGET_LOONGARCH64)
+#error The file should not be included for this platform.
+#endif
+
+// clang-format off
+  #define CPU_LOAD_STORE_ARCH      1
+  //#define CPU_LONG_USES_REGPAIR    0
+  #define CPU_HAS_FP_SUPPORT       1
+  #define ROUND_FLOAT              0       // Do not round intermed float expression results
+  #define CPU_HAS_BYTE_REGS        0
+  //#define CPU_USES_BLOCK_MOVE      0
+
+  #define CPBLK_UNROLL_LIMIT       64      // Upper bound to let the code generator to loop unroll CpBlk.
+  #define INITBLK_UNROLL_LIMIT     64      // Upper bound to let the code generator to loop unroll InitBlk.
+
+#ifdef FEATURE_SIMD
+#pragma error("SIMD Unimplemented yet LOONGARCH")
+  #define ALIGN_SIMD_TYPES         1       // whether SIMD type locals are to be aligned
+  #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 1 // Whether SIMD registers are partially saved at calls
+#endif // FEATURE_SIMD
+
+  #define FEATURE_FIXED_OUT_ARGS   1       // Preallocate the outgoing arg area in the prolog
+  #define FEATURE_STRUCTPROMOTE    0       // JIT Optimization to promote fields of structs into registers
+  #define FEATURE_MULTIREG_STRUCT_PROMOTE 0  // True when we want to promote fields of a multireg struct into registers
+  #define FEATURE_FASTTAILCALL     1       // Tail calls made as epilog+jmp
+  #define FEATURE_TAILCALL_OPT     1       // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
+  #define FEATURE_SET_FLAGS        0       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+  #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register
+  #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register
+  #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register
+  #define FEATURE_STRUCT_CLASSIFIER     0  // Uses a classifier function to determine is structs are passed/returned in more than one register
+  #define MAX_PASS_SINGLEREG_BYTES      8  // Maximum size of a struct passed in a single register (8-byte).
+  #define MAX_PASS_MULTIREG_BYTES      16  // Maximum size of a struct that could be passed in more than one register
+  #define MAX_RET_MULTIREG_BYTES       16  // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 2 doubles)
+  #define MAX_ARG_REG_COUNT             2  // Maximum registers used to pass a single argument in multiple registers.
+  #define MAX_RET_REG_COUNT             2  // Maximum registers used to return a value.
+  #define MAX_MULTIREG_COUNT            2  // Maxiumum number of registers defined by a single instruction (including calls).
+                                           // This is also the maximum number of registers for a MultiReg node.
+
+
+  #define NOGC_WRITE_BARRIERS      1       // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+  #define USER_ARGS_COME_LAST      1
+  #define EMIT_TRACK_STACK_DEPTH   1       // This is something of a workaround.  For both ARM and AMD64, the frame size is fixed, so we don't really
+                                           // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
+  #define TARGET_POINTER_SIZE      8       // equal to sizeof(void*) and the managed pointer size in bytes for this target
+  #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+  #define FEATURE_EH_FUNCLETS      1
+  #define FEATURE_EH_CALLFINALLY_THUNKS 1  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+  #define ETW_EBP_FRAMED           1       // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
+  #define CSE_CONSTS               1       // Enable if we want to CSE constants
+
+  #define REG_FP_FIRST             REG_F0
+  #define REG_FP_LAST              REG_F31
+  #define FIRST_FP_ARGREG          REG_F0
+  #define LAST_FP_ARGREG           REG_F7
+
+  #define REGNUM_BITS              6       // number of bits in a REG_* within registerloongarch64.h
+  #define REGSIZE_BYTES            8       // number of bytes in one general purpose register
+  #define FP_REGSIZE_BYTES         8       // number of bytes in one FP register
+  #define FPSAVE_REGSIZE_BYTES     8       // number of bytes in one FP register that are saved/restored.
+
+  #define MIN_ARG_AREA_FOR_CALL    0       // Minimum required outgoing argument space for a call.
+
+  #define CODE_ALIGN               4       // code alignment requirement
+  #define STACK_ALIGN              16      // stack alignment requirement
+
+  #define RBM_INT_CALLEE_SAVED    (RBM_S0|RBM_S1|RBM_S2|RBM_S3|RBM_S4|RBM_S5|RBM_S6|RBM_S7|RBM_S8)
+  #define RBM_INT_CALLEE_TRASH    (RBM_A0|RBM_A1|RBM_A2|RBM_A3|RBM_A4|RBM_A5|RBM_A6|RBM_A7|RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T5|RBM_T6|RBM_T7|RBM_T8)
+  #define RBM_FLT_CALLEE_SAVED    (RBM_F24|RBM_F25|RBM_F26|RBM_F27|RBM_F28|RBM_F29|RBM_F30|RBM_F31)
+  //#define RBM_FLT_CALLEE_TRASH    (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F12|RBM_F13|RBM_F14|RBM_F15|RBM_F16|RBM_F17|RBM_F18|RBM_F19|RBM_F20|RBM_F21|RBM_F22|RBM_F23)
+  #define RBM_FLT_CALLEE_TRASH    (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7)
+
+  #define RBM_CALLEE_SAVED        (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
+  #define RBM_CALLEE_TRASH        (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+
+  #define REG_DEFAULT_HELPER_CALL_TARGET REG_T2
+  #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_T2
+
+  #define REG_FASTTAILCALL_TARGET REG_T4   // Target register for fast tail call
+  #define RBM_FASTTAILCALL_TARGET RBM_T4
+
+  #define RBM_ALLINT              (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+  #define RBM_ALLFLOAT            (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
+  #define RBM_ALLDOUBLE            RBM_ALLFLOAT
+
+  // REG_VAR_ORDER is: (CALLEE_TRASH & ~CALLEE_TRASH_NOGC), CALLEE_TRASH_NOGC, CALLEE_SAVED
+  #define REG_VAR_ORDER            REG_A0,REG_A1,REG_A2,REG_A3,REG_A4,REG_A5,REG_A6,REG_A7, \
+                                   REG_T0,REG_T1,REG_T2,REG_T3,REG_T4,REG_T5,REG_T6,REG_T7,REG_T8, \
+                                   REG_CALLEE_SAVED_ORDER
+
+  #define REG_VAR_ORDER_FLT        REG_F12,REG_F13,REG_F14,REG_F15,REG_F16,REG_F17,REG_F18,REG_F19, \
+                                   REG_F2,REG_F3,REG_F4,REG_F5,REG_F6,REG_F7,REG_F8,REG_F9,REG_F10, \
+                                   REG_F20,REG_F21,REG_F22,REG_F23, \
+                                   REG_F24,REG_F25,REG_F26,REG_F27,REG_F28,REG_F29,REG_F30,REG_F31, \
+                                   REG_F1,REG_F0
+
+  #define REG_CALLEE_SAVED_ORDER   REG_S0,REG_S1,REG_S2,REG_S3,REG_S4,REG_S5,REG_S6,REG_S7,REG_S8
+  #define RBM_CALLEE_SAVED_ORDER   RBM_S0,RBM_S1,RBM_S2,RBM_S3,RBM_S4,RBM_S5,RBM_S6,RBM_S7,RBM_S8
+
+  #define CNT_CALLEE_SAVED        (9)             //s0-s8, not including fp,ra.
+  #define CNT_CALLEE_TRASH        (17)
+  #define CNT_CALLEE_ENREG        (CNT_CALLEE_SAVED-1)
+
+  #define CNT_CALLEE_SAVED_FLOAT  (8)
+  #define CNT_CALLEE_TRASH_FLOAT  (24)
+
+  #define CALLEE_SAVED_REG_MAXSZ    (CNT_CALLEE_SAVED * REGSIZE_BYTES)
+  #define CALLEE_SAVED_FLOAT_MAXSZ  (CNT_CALLEE_SAVED_FLOAT * FPSAVE_REGSIZE_BYTES)
+
+  #define REG_TMP_0                REG_T0
+
+  // Temporary registers used for the GS cookie check.
+  #define REG_GSCOOKIE_TMP_0       REG_T0
+  #define REG_GSCOOKIE_TMP_1       REG_T1
+
+  // register to hold shift amount; no special register is required on LOONGARCH64.
+  #define REG_SHIFT                REG_NA
+  #define RBM_SHIFT                RBM_ALLINT
+  //#define PREDICT_REG_SHIFT        PREDICT_REG
+
+  // This is a general scratch register that does not conflict with the argument registers
+  #define REG_SCRATCH              REG_T0
+
+  // This is a float scratch register that does not conflict with the argument registers
+  #define REG_SCRATCH_FLT          REG_F11
+
+  // This is a general register that can be optionally reserved for other purposes during codegen
+  #define REG_OPT_RSVD             REG_T1
+  #define RBM_OPT_RSVD             RBM_T1
+
+  // Where is the exception object on entry to the handler block?
+  #define REG_EXCEPTION_OBJECT     REG_A0
+  #define RBM_EXCEPTION_OBJECT     RBM_A0
+
+  #define REG_JUMP_THUNK_PARAM     REG_T2
+  #define RBM_JUMP_THUNK_PARAM     RBM_T2
+
+  // LOONGARCH64 write barrier ABI (see vm/loongarch64/asmhelpers.S):
+  // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier):
+  //     On entry:
+  //       v0: the destination address (LHS of the assignment)
+  //       v1: the object reference (RHS of the assignment)
+  //     On exit:
+  //       t0: trashed
+  //       t1: trashed
+  //       t2: trashed
+  //       t3: trashed
+  //       v0: incremented by 8
+  //       v1: trashed
+  //       ??: trashed if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP (currently non-Windows)
+  // CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier):
+  //     On entry:
+  //       t8: the source address (points to object reference to write)
+  //       v0: the destination address (object reference written here)
+  //     On exit:
+  //       t8: incremented by 8
+  //       v0: incremented by 8
+  //
+  // Note that while ?reg? is currently only trashed under FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP,
+  // currently only set for non-Windows//, it is expected to be set in the future for Windows, and for R2R.
+  // So simply always consider it trashed, to avoid later breaking changes.
+
+  #define REG_WRITE_BARRIER_DST          REG_T6
+  #define RBM_WRITE_BARRIER_DST          RBM_T6
+
+  #define REG_WRITE_BARRIER_SRC          REG_T7
+  #define RBM_WRITE_BARRIER_SRC          RBM_T7
+
+  #define REG_WRITE_BARRIER_DST_BYREF    REG_T6
+  #define RBM_WRITE_BARRIER_DST_BYREF    RBM_T6
+
+  #define REG_WRITE_BARRIER_SRC_BYREF    REG_T8
+  #define RBM_WRITE_BARRIER_SRC_BYREF    RBM_T8
+
+  #define RBM_CALLEE_TRASH_NOGC          (RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T6|RBM_T7|RBM_DEFAULT_HELPER_CALL_TARGET)
+
+  // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+  #define RBM_CALLEE_TRASH_WRITEBARRIER         (RBM_WRITE_BARRIER_DST|RBM_CALLEE_TRASH_NOGC)
+
+  // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
+  #define RBM_CALLEE_GCTRASH_WRITEBARRIER       RBM_CALLEE_TRASH_NOGC
+
+  // Registers killed by CORINFO_HELP_ASSIGN_BYREF.
+  #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF   (RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC)
+
+  // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF.
+  // Note that a0 and a1 are still valid byref pointers after this helper call, despite their value being changed.
+  #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF RBM_CALLEE_TRASH_NOGC
+
+  // GenericPInvokeCalliHelper VASigCookie Parameter
+  #define REG_PINVOKE_COOKIE_PARAM          REG_T3
+  #define RBM_PINVOKE_COOKIE_PARAM          RBM_T3
+
+  // GenericPInvokeCalliHelper unmanaged target Parameter
+  #define REG_PINVOKE_TARGET_PARAM          REG_T2
+  #define RBM_PINVOKE_TARGET_PARAM          RBM_T2
+
+  // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM)
+  #define REG_SECRET_STUB_PARAM     REG_T2
+  #define RBM_SECRET_STUB_PARAM     RBM_T2
+
+  // R2R indirect call. Use the same registers as VSD
+  #define REG_R2R_INDIRECT_PARAM          REG_T8
+  #define RBM_R2R_INDIRECT_PARAM          RBM_T8
+
+  #define REG_INDIRECT_CALL_TARGET_REG    REG_T6
+
+  // Registers used by PInvoke frame setup  //should confirm.
+  #define REG_PINVOKE_FRAME        REG_T0
+  #define RBM_PINVOKE_FRAME        RBM_T0
+  #define REG_PINVOKE_TCB          REG_T1
+  #define RBM_PINVOKE_TCB          RBM_T1
+  #define REG_PINVOKE_SCRATCH      REG_T1
+  #define RBM_PINVOKE_SCRATCH      RBM_T1
+
+  // The following defines are useful for iterating a regNumber
+  #define REG_FIRST                REG_R0
+  #define REG_INT_FIRST            REG_R0
+  #define REG_INT_LAST             REG_S8
+  #define REG_INT_COUNT            (REG_INT_LAST - REG_INT_FIRST + 1)
+  #define REG_NEXT(reg)           ((regNumber)((unsigned)(reg) + 1))
+  #define REG_PREV(reg)           ((regNumber)((unsigned)(reg) - 1))
+
+  // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks
+  #define REG_PROFILER_ENTER_ARG_FUNC_ID    REG_R10
+  #define RBM_PROFILER_ENTER_ARG_FUNC_ID    RBM_R10
+  #define REG_PROFILER_ENTER_ARG_CALLER_SP  REG_R11
+  #define RBM_PROFILER_ENTER_ARG_CALLER_SP  RBM_R11
+  #define REG_PROFILER_LEAVE_ARG_FUNC_ID    REG_R10
+  #define RBM_PROFILER_LEAVE_ARG_FUNC_ID    RBM_R10
+  #define REG_PROFILER_LEAVE_ARG_CALLER_SP  REG_R11
+  #define RBM_PROFILER_LEAVE_ARG_CALLER_SP  RBM_R11
+
+  // The registers trashed by profiler enter/leave/tailcall hook
+  #define RBM_PROFILER_ENTER_TRASH     (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP))
+  #define RBM_PROFILER_LEAVE_TRASH     (RBM_CALLEE_TRASH & ~(RBM_ARG_REGS|RBM_FLTARG_REGS|RBM_FP))
+  #define RBM_PROFILER_TAILCALL_TRASH  RBM_PROFILER_LEAVE_TRASH
+
+  // Which register are int and long values returned in ?
+  #define REG_INTRET               REG_A0
+  #define RBM_INTRET               RBM_A0
+  #define REG_LNGRET               REG_A0
+  #define RBM_LNGRET               RBM_A0
+  // second return register for 16-byte structs
+  #define REG_INTRET_1             REG_A1
+  #define RBM_INTRET_1             RBM_A1
+
+  #define REG_FLOATRET             REG_F0
+  #define RBM_FLOATRET             RBM_F0
+  #define RBM_DOUBLERET            RBM_F0
+  #define REG_FLOATRET_1           REG_F1
+  #define RBM_FLOATRET_1           RBM_F1
+  #define RBM_DOUBLERET_1          RBM_F1
+
+  // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+  #define RBM_STOP_FOR_GC_TRASH    RBM_CALLEE_TRASH
+
+  // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+  #define RBM_INIT_PINVOKE_FRAME_TRASH  RBM_CALLEE_TRASH
+
+  #define REG_FPBASE               REG_FP
+  #define RBM_FPBASE               RBM_FP
+  #define STR_FPBASE               "fp"
+  #define REG_SPBASE               REG_SP
+  #define RBM_SPBASE               RBM_SP     // reuse the RBM for REG_SP
+  #define STR_SPBASE               "sp"
+
+  #define FIRST_ARG_STACK_OFFS    (2*REGSIZE_BYTES)   // Caller's saved FP and return address
+
+  #define MAX_REG_ARG              8
+  #define MAX_FLOAT_REG_ARG        8
+
+  #define REG_ARG_FIRST            REG_A0
+  #define REG_ARG_LAST             REG_A7
+  #define REG_ARG_FP_FIRST         REG_F0
+  #define REG_ARG_FP_LAST          REG_F7
+  #define INIT_ARG_STACK_SLOT      0                  // No outgoing reserved stack slots
+
+  #define REG_ARG_0                REG_A0
+  #define REG_ARG_1                REG_A1
+  #define REG_ARG_2                REG_A2
+  #define REG_ARG_3                REG_A3
+  #define REG_ARG_4                REG_A4
+  #define REG_ARG_5                REG_A5
+  #define REG_ARG_6                REG_A6
+  #define REG_ARG_7                REG_A7
+
+  extern const regNumber intArgRegs [MAX_REG_ARG];
+  extern const regMaskTP intArgMasks[MAX_REG_ARG];
+
+  #define RBM_ARG_0                RBM_A0
+  #define RBM_ARG_1                RBM_A1
+  #define RBM_ARG_2                RBM_A2
+  #define RBM_ARG_3                RBM_A3
+  #define RBM_ARG_4                RBM_A4
+  #define RBM_ARG_5                RBM_A5
+  #define RBM_ARG_6                RBM_A6
+  #define RBM_ARG_7                RBM_A7
+
+  #define REG_FLTARG_0             REG_F0
+  #define REG_FLTARG_1             REG_F1
+  #define REG_FLTARG_2             REG_F2
+  #define REG_FLTARG_3             REG_F3
+  #define REG_FLTARG_4             REG_F4
+  #define REG_FLTARG_5             REG_F5
+  #define REG_FLTARG_6             REG_F6
+  #define REG_FLTARG_7             REG_F7
+
+  #define RBM_FLTARG_0             RBM_F0
+  #define RBM_FLTARG_1             RBM_F1
+  #define RBM_FLTARG_2             RBM_F2
+  #define RBM_FLTARG_3             RBM_F3
+  #define RBM_FLTARG_4             RBM_F4
+  #define RBM_FLTARG_5             RBM_F5
+  #define RBM_FLTARG_6             RBM_F6
+  #define RBM_FLTARG_7             RBM_F7
+
+  #define RBM_ARG_REGS            (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5|RBM_ARG_6|RBM_ARG_7)
+  #define RBM_FLTARG_REGS         (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7)
+
+  extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG];
+  extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG];
+
+  #define B_DIST_SMALL_MAX_NEG  (-131072)
+  #define B_DIST_SMALL_MAX_POS  (+131071)
+
+  #define OFFSET_DIST_SMALL_MAX_NEG   (-2048)
+  #define OFFSET_DIST_SMALL_MAX_POS   (+2047)
+
+  #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 0
+
+// clang-format on
diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp
index 8d5efd0051906..ffb7cf71d886b 100644
--- a/src/coreclr/jit/unwind.cpp
+++ b/src/coreclr/jit/unwind.cpp
@@ -412,7 +412,7 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func)
     else
     {
         if (TargetArchitecture::IsX64 ||
-            (TargetOS::IsUnix && (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86)))
+            (TargetOS::IsUnix && (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86 || TargetArchitecture::IsLoongArch64)))
         {
             assert(func->startLoc != nullptr);
             offset = func->startLoc->GetFuncletPrologOffset(GetEmitter());
@@ -442,6 +442,10 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func)
 
 // See unwindX86.cpp
 
+#elif defined(TARGET_LOONGARCH64)
+
+// See unwindLoongarch64.cpp
+
 #else // TARGET*
 
 #error Unsupported or unset target architecture
diff --git a/src/coreclr/jit/unwind.h b/src/coreclr/jit/unwind.h
index c578c30cb78d0..bb93348cc2fdd 100644
--- a/src/coreclr/jit/unwind.h
+++ b/src/coreclr/jit/unwind.h
@@ -10,7 +10,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 */
 
-#ifdef TARGET_ARMARCH
+////TODO for LOONGARCH64: should seperately define for loongarch64.
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
 // Windows no longer imposes a maximum prolog size. However, we still have an
 // assert here just to inform us if we increase the size of the prolog
@@ -34,7 +35,15 @@ const unsigned MAX_EPILOG_SIZE_BYTES = 100;
 #define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20)
 #define UW_MAX_CODE_WORDS_COUNT 31
 #define UW_MAX_EPILOG_START_INDEX 0x3FFU
-#endif // TARGET_ARM64
+#elif defined(TARGET_LOONGARCH64)
+const unsigned MAX_PROLOG_SIZE_BYTES = 200;
+const unsigned MAX_EPILOG_SIZE_BYTES = 200;
+#define UWC_END 0xE4   // "end" unwind code
+#define UWC_END_C 0xE5 // "end_c" unwind code
+#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20)
+#define UW_MAX_CODE_WORDS_COUNT 31
+#define UW_MAX_EPILOG_START_INDEX 0x3FFU
+#endif // TARGET_LOONGARCH64
 
 #define UW_MAX_EPILOG_COUNT 31                 // Max number that can be encoded in the "Epilog count" field
                                                // of the .pdata record
@@ -131,6 +140,8 @@ class UnwindCodesBase
         return b >= 0xFD;
 #elif defined(TARGET_ARM64)
         return (b == UWC_END); // TODO-ARM64-Bug?: what about the "end_c" code?
+#elif defined(TARGET_LOONGARCH64)
+        return (b == UWC_END);
 #endif // TARGET_ARM64
     }
 
@@ -813,7 +824,7 @@ class UnwindInfo : public UnwindBase
     // Given the first byte of the unwind code, check that its opsize matches
     // the last instruction added in the emitter.
     void CheckOpsize(BYTE b1);
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     void CheckOpsize(BYTE b1)
     {
     } // nothing to do; all instructions are 4 bytes
diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp
new file mode 100644
index 0000000000000..eae92c102e381
--- /dev/null
+++ b/src/coreclr/jit/unwindloongarch64.cpp
@@ -0,0 +1,2347 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+// Copyright (c) Loongson Technology. All rights reserved.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              UnwindInfo                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(TARGET_LOONGARCH64)
+
+#if defined(FEATURE_CFI_SUPPORT)
+short Compiler::mapRegNumToDwarfReg(regNumber reg)
+{
+    short dwarfReg = DWARF_REG_ILLEGAL;
+
+    switch (reg)
+    {
+        case REG_R0:
+            dwarfReg = 0;
+            break;
+        case REG_RA:
+            dwarfReg = 1;
+            break;
+        case REG_TP:
+            dwarfReg = 2;
+            break;
+        case REG_SP:
+            dwarfReg = 3;
+            break;
+        case REG_A0:
+            dwarfReg = 4;
+            break;
+        case REG_A1:
+            dwarfReg = 5;
+            break;
+        case REG_A2:
+            dwarfReg = 6;
+            break;
+        case REG_A3:
+            dwarfReg = 7;
+            break;
+        case REG_A4:
+            dwarfReg = 8;
+            break;
+        case REG_A5:
+            dwarfReg = 9;
+            break;
+        case REG_A6:
+            dwarfReg = 10;
+            break;
+        case REG_A7:
+            dwarfReg = 11;
+            break;
+        case REG_T0:
+            dwarfReg = 12;
+            break;
+        case REG_T1:
+            dwarfReg = 13;
+            break;
+        case REG_T2:
+            dwarfReg = 14;
+            break;
+        case REG_T3:
+            dwarfReg = 15;
+            break;
+        case REG_T4:
+            dwarfReg = 16;
+            break;
+        case REG_T5:
+            dwarfReg = 17;
+            break;
+        case REG_T6:
+            dwarfReg = 18;
+            break;
+        case REG_T7:
+            dwarfReg = 19;
+            break;
+        case REG_T8:
+            dwarfReg = 20;
+            break;
+        case REG_X0:
+            dwarfReg = 21;
+            break;
+        case REG_FP:
+            dwarfReg = 22;
+            break;
+        case REG_S0:
+            dwarfReg = 23;
+            break;
+        case REG_S1:
+            dwarfReg = 24;
+            break;
+        case REG_S2:
+            dwarfReg = 25;
+            break;
+        case REG_S3:
+            dwarfReg = 26;
+            break;
+        case REG_S4:
+            dwarfReg = 27;
+            break;
+        case REG_S5:
+            dwarfReg = 28;
+            break;
+        case REG_S6:
+            dwarfReg = 29;
+            break;
+        case REG_S7:
+            dwarfReg = 30;
+            break;
+        case REG_S8:
+            dwarfReg = 31;
+            break;
+        case REG_F0:
+            dwarfReg = 64;
+            break;
+        case REG_F1:
+            dwarfReg = 65;
+            break;
+        case REG_F2:
+            dwarfReg = 66;
+            break;
+        case REG_F3:
+            dwarfReg = 67;
+            break;
+        case REG_F4:
+            dwarfReg = 68;
+            break;
+        case REG_F5:
+            dwarfReg = 69;
+            break;
+        case REG_F6:
+            dwarfReg = 70;
+            break;
+        case REG_F7:
+            dwarfReg = 71;
+            break;
+        case REG_F8:
+            dwarfReg = 72;
+            break;
+        case REG_F9:
+            dwarfReg = 73;
+            break;
+        case REG_F10:
+            dwarfReg = 74;
+            break;
+        case REG_F11:
+            dwarfReg = 75;
+            break;
+        case REG_F12:
+            dwarfReg = 76;
+            break;
+        case REG_F13:
+            dwarfReg = 77;
+            break;
+        case REG_F14:
+            dwarfReg = 78;
+            break;
+        case REG_F15:
+            dwarfReg = 79;
+            break;
+        case REG_F16:
+            dwarfReg = 80;
+            break;
+        case REG_F17:
+            dwarfReg = 81;
+            break;
+        case REG_F18:
+            dwarfReg = 82;
+            break;
+        case REG_F19:
+            dwarfReg = 83;
+            break;
+        case REG_F20:
+            dwarfReg = 84;
+            break;
+        case REG_F21:
+            dwarfReg = 85;
+            break;
+        case REG_F22:
+            dwarfReg = 86;
+            break;
+        case REG_F23:
+            dwarfReg = 87;
+            break;
+        case REG_F24:
+            dwarfReg = 88;
+            break;
+        case REG_F25:
+            dwarfReg = 89;
+            break;
+        case REG_F26:
+            dwarfReg = 90;
+            break;
+        case REG_F27:
+            dwarfReg = 91;
+            break;
+        case REG_F28:
+            dwarfReg = 92;
+            break;
+        case REG_F29:
+            dwarfReg = 93;
+            break;
+        case REG_F30:
+            dwarfReg = 94;
+            break;
+        case REG_F31:
+            dwarfReg = 95;
+            break;
+
+        default:
+            NYI("CFI codes");
+    }
+
+    return dwarfReg;
+}
+#endif // FEATURE_CFI_SUPPORT
+
+void Compiler::unwindPush(regNumber reg)
+{
+    unreached(); // use one of the unwindSaveReg* functions instead.
+}
+
+void Compiler::unwindAllocStack(unsigned size)
+{
+#if defined(TARGET_UNIX)
+    if (generateCFIUnwindCodes())
+    {
+        if (compGeneratingProlog)
+        {
+            unwindAllocStackCFI(size);
+        }
+
+        return;
+    }
+#endif // TARGET_UNIX
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    assert(size % 16 == 0);
+    unsigned x = size / 16;
+
+    if (x <= 0x1F)
+    {
+        // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16)
+        // TODO-Review: should say size < 512
+
+        pu->AddCode((BYTE)x);
+    }
+    else if (x <= 0x7F)
+    {
+        // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16)
+
+        pu->AddCode(0xC0 | (BYTE)(x >> 8), (BYTE)x);
+    }
+    else
+    {
+        // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16)
+        //
+        // For large stack size, the most significant bits
+        // are stored first (and next to the opCode) per the unwind spec.
+
+        pu->AddCode(0xE0, (BYTE)(x >> 16), (BYTE)(x >> 8), (BYTE)x);
+    }
+}
+
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+#if defined(TARGET_UNIX)
+    if (generateCFIUnwindCodes())
+    {
+        if (compGeneratingProlog)
+        {
+            unwindSetFrameRegCFI(reg, offset);
+        }
+
+        return;
+    }
+#endif // TARGET_UNIX
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    if (offset == 0)
+    {
+        assert(reg == REG_FP);
+
+        // set_fp: 11100001 : set up fp : with : move fp, sp
+        pu->AddCode(0xE1);
+    }
+    else
+    {
+        // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8
+
+        assert(reg == REG_FP);
+        assert((offset % 8) == 0);
+
+        unsigned x = offset / 8;
+        assert(x <= 0x1FF);
+
+        pu->AddCode(0xE2, (BYTE)(x >> 8), (BYTE)x);
+    }
+}
+
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+    unwindSaveReg(reg, (int)offset);
+}
+
+void Compiler::unwindNop()
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("unwindNop: adding NOP\n");
+    }
+#endif
+
+    INDEBUG(pu->uwiAddingNOP = true);
+
+    // nop: 11100011: no unwind operation is required.
+    pu->AddCode(0xE3);
+
+    INDEBUG(pu->uwiAddingNOP = false);
+}
+
+void Compiler::unwindSaveReg(regNumber reg, int offset)
+{
+
+    // st.d reg, sp, offset
+
+    // offset for store in prolog must be positive and a multiple of 8.
+    assert(0 <= offset && offset <= 2047);
+    assert((offset % 8) == 0);
+
+#if defined(TARGET_UNIX)
+    if (generateCFIUnwindCodes())
+    {
+        if (compGeneratingProlog)
+        {
+            FuncInfoDsc*   func     = funCurrentFunc();
+            UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func);
+
+            createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), offset);
+        }
+
+        return;
+    }
+#endif // TARGET_UNIX
+    int z = offset / 8;
+    //assert(0 <= z && z <= 0xFF);
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    if (emitter::isGeneralRegister(reg))
+    {
+        // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047
+
+        assert(reg == REG_RA || reg == REG_FP ||    // first legal register: RA
+               (REG_S0 <= reg && reg <= REG_S8));   // last legal register: S8
+
+        BYTE x = (BYTE)(reg - REG_RA);
+        assert(0 <= x && x <= 0x1E);
+
+        pu->AddCode(0xD0, (BYTE)x, (BYTE)z);
+    }
+    else
+    {
+        // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047
+
+        assert(REG_F24 <= reg && // first legal register: F24
+               reg <= REG_F31); // last legal register: F31
+
+        BYTE x = (BYTE)(reg - REG_F24);
+        assert(0 <= x && x <= 0x7);
+
+        pu->AddCode(0xDC, (BYTE)(x << 4) | (BYTE)(z >> 8), (BYTE)z);
+    }
+}
+
+void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset)
+{
+    //TODO:temp not used on loongarch64.
+    assert(!"unimplemented on LOONGARCH yet");
+#if 0
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    // stp reg1, reg2, [sp, #offset]
+
+    // offset for store pair in prolog must be positive and a multiple of 16.
+    assert(0 <= offset && offset <= 0xff0);
+    assert((offset % 16) == 0);
+
+    int z = offset / 8;
+    //assert(0 <= z && z <= 0x1FE);
+
+#if defined(TARGET_UNIX)
+    if (generateCFIUnwindCodes())
+    {
+        if (compGeneratingProlog)
+        {
+            FuncInfoDsc*   func     = funCurrentFunc();
+            UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func);
+
+            createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg1), offset);
+            createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg2), offset + 8);
+        }
+
+        return;
+    }
+#endif // TARGET_UNIX
+    if (reg1 == REG_FP)
+    {
+        // save_fpra: 0100zzzz | zzzzzzzz: save <fp,ra> pair at [sp+#Z*8], offset <= 0xff0
+        assert(reg2 == REG_RA);
+
+        pu->AddCode(0x40 | (BYTE)(z >> 8), (BYTE)z);
+    }
+    else if (reg2 == REG_RA)
+    {
+        assert(!"unimplemented on LOONGARCH yet");
+    }
+    else if (emitter::isGeneralRegister(reg1))
+    {
+        // save_regp: 11001000 | 0xxxzzzz | zzzzzzzz: save s(0 + #X) pair at [sp + #Z * 8], offset <= 4080
+        assert(REG_NEXT(reg1) == reg2);
+        assert(REG_S0 <= reg1 && // first legal pair: S0, S1
+               reg1 <= REG_S6);  // last legal pair: S6, S7 (FP is never saved without RA)
+
+        BYTE x = (BYTE)(reg1 - REG_S0);
+        //assert(0 <= x && x <= 0x6);
+
+        pu->AddCode(0xC8, (BYTE)(x << 4) | (BYTE)(z >> 8), (BYTE)z);
+    }
+    else
+    {
+        assert(!"unimplemented on LOONGARCH yet");
+    }
+#endif
+}
+
+void Compiler::unwindReturn(regNumber reg)
+{
+    // Nothing to do; we will always have at least one trailing "end" opcode in our padding.
+}
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind Info Debug helpers                                                XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// Return the size of the unwind code (from 1 to 4 bytes), given the first byte of the unwind bytes
+
+unsigned GetUnwindSizeFromUnwindHeader(BYTE b1)
+{
+    static BYTE s_UnwindSize[256] = {
+        // array of unwind sizes, in bytes (as specified in the LOONGARCH unwind specification)
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 00-0F
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 10-1F
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 20-2F
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 30-3F
+        2, 2, 2, 2, 2, 2, 2, 2,    2, 2, 2, 2, 2, 2, 2, 2, // 40-4F
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 50-5F
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 60-6F
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 70-7F
+        2, 2, 2, 2, 2, 2, 2, 2,    2, 2, 2, 2, 2, 2, 2, 2, // 80-8F
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 90-9F
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // A0-AF
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // B0-BF
+        2, 2, 2, 2, 2, 2, 2, 2,    3, 2, 2, 2, 3, 2, 2, 2, // C0-CF
+        3, 2, 2, 2, 2, 2, 3, 2,    3, 2, 3, 2, 3, 2, 2, 1, // D0-DF
+        4, 1, 3, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // E0-EF
+        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1  // F0-FF
+    };
+
+    unsigned size = s_UnwindSize[b1];
+    assert(1 <= size && size <= 4);
+    return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind Info Support Classes                                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindCodesBase
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef DEBUG
+
+// Walk the prolog codes and calculate the size of the prolog or epilog, in bytes.
+unsigned UnwindCodesBase::GetCodeSizeFromUnwindCodes(bool isProlog)
+{
+    BYTE*    pCodesStart = GetCodes();
+    BYTE*    pCodes      = pCodesStart;
+    unsigned size        = 0;
+    for (;;)
+    {
+        BYTE b1 = *pCodes;
+        if (IsEndCode(b1))
+        {
+            break; // We hit an "end" code; we're done
+        }
+        size += 4; // All codes represent 4 byte instructions.
+        pCodes += GetUnwindSizeFromUnwindHeader(b1);
+        assert(pCodes - pCodesStart < 256); // 255 is the absolute maximum number of code bytes allowed
+    }
+    return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Debug dumpers                                                            XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// start is 0-based index from LSB, length is number of bits
+DWORD ExtractBits(DWORD dw, DWORD start, DWORD length)
+{
+    return (dw >> start) & ((1 << length) - 1);
+}
+
+// Dump the unwind data.
+// Arguments:
+//      isHotCode:          true if this unwind data is for the hot section
+//      startOffset:        byte offset of the code start that this unwind data represents
+//      endOffset:          byte offset of the code end   that this unwind data represents
+//      pHeader:            pointer to the unwind data blob
+//      unwindBlockSize:    size in bytes of the unwind data blob
+
+void DumpUnwindInfo(Compiler*         comp,
+                    bool              isHotCode,
+                    UNATIVE_OFFSET    startOffset,
+                    UNATIVE_OFFSET    endOffset,
+                    const BYTE* const pHeader,
+                    ULONG             unwindBlockSize)
+{
+    printf("Unwind Info%s:\n", isHotCode ? "" : " COLD");
+
+    // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
+    // to provide padding, and round down to get a multiple of 4 bytes in size.
+    DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
+    DWORD dw;
+
+    dw = *pdw++;
+
+    DWORD codeWords      = ExtractBits(dw, 27, 5);
+    DWORD epilogCount    = ExtractBits(dw, 22, 5);
+    DWORD EBit           = ExtractBits(dw, 21, 1);
+    DWORD XBit           = ExtractBits(dw, 20, 1);
+    DWORD Vers           = ExtractBits(dw, 18, 2);
+    DWORD functionLength = ExtractBits(dw, 0, 18);
+
+    printf("  >> Start offset   : 0x%06x (not in unwind data)\n", comp->dspOffset(startOffset));
+    printf("  >>   End offset   : 0x%06x (not in unwind data)\n", comp->dspOffset(endOffset));
+    printf("  Code Words        : %u\n", codeWords);
+    printf("  Epilog Count      : %u\n", epilogCount);
+    printf("  E bit             : %u\n", EBit);
+    printf("  X bit             : %u\n", XBit);
+    printf("  Vers              : %u\n", Vers);
+    printf("  Function Length   : %u (0x%05x) Actual length = %u (0x%06x)\n", functionLength, functionLength,
+           functionLength * 4, functionLength * 4);
+
+    assert(functionLength * 4 == endOffset - startOffset);
+
+    if (codeWords == 0 && epilogCount == 0)
+    {
+        // We have an extension word specifying a larger number of Code Words or Epilog Counts
+        // than can be specified in the header word.
+
+        dw = *pdw++;
+
+        codeWords   = ExtractBits(dw, 16, 8);
+        epilogCount = ExtractBits(dw, 0, 16);
+        assert((dw & 0xF0000000) == 0); // reserved field should be zero
+
+        printf("  ---- Extension word ----\n");
+        printf("  Extended Code Words        : %u\n", codeWords);
+        printf("  Extended Epilog Count      : %u\n", epilogCount);
+    }
+
+    bool epilogStartAt[1024] = {}; // One byte per possible epilog start index; initialized to false
+
+    if (EBit == 0)
+    {
+        // We have an array of epilog scopes
+
+        printf("  ---- Epilog scopes ----\n");
+        if (epilogCount == 0)
+        {
+            printf("  No epilogs\n");
+        }
+        else
+        {
+            for (DWORD scope = 0; scope < epilogCount; scope++)
+            {
+                dw = *pdw++;
+
+                DWORD epilogStartOffset = ExtractBits(dw, 0, 18);
+                DWORD res               = ExtractBits(dw, 18, 4);
+                DWORD epilogStartIndex  = ExtractBits(dw, 22, 10);
+
+                // Note that epilogStartOffset for a funclet is the offset from the beginning
+                // of the current funclet, not the offset from the beginning of the main function.
+                // To help find it when looking through JitDump output, also show the offset from
+                // the beginning of the main function.
+                DWORD epilogStartOffsetFromMainFunctionBegin = epilogStartOffset * 4 + startOffset;
+
+                assert(res == 0);
+
+                printf("  ---- Scope %d\n", scope);
+                printf("  Epilog Start Offset        : %u (0x%05x) Actual offset = %u (0x%06x) Offset from main "
+                       "function begin = %u (0x%06x)\n",
+                       comp->dspOffset(epilogStartOffset), comp->dspOffset(epilogStartOffset),
+                       comp->dspOffset(epilogStartOffset * 4), comp->dspOffset(epilogStartOffset * 4),
+                       comp->dspOffset(epilogStartOffsetFromMainFunctionBegin),
+                       comp->dspOffset(epilogStartOffsetFromMainFunctionBegin));
+                printf("  Epilog Start Index         : %u (0x%02x)\n", epilogStartIndex, epilogStartIndex);
+
+                epilogStartAt[epilogStartIndex] = true; // an epilog starts at this offset in the unwind codes
+            }
+        }
+    }
+    else
+    {
+        printf("  --- One epilog, unwind codes at %u\n", epilogCount);
+        assert(epilogCount < ArrLen(epilogStartAt));
+        epilogStartAt[epilogCount] = true; // the one and only epilog starts its unwind codes at this offset
+    }
+
+    // Dump the unwind codes
+
+    printf("  ---- Unwind codes ----\n");
+
+    DWORD countOfUnwindCodes = codeWords * 4;
+    PBYTE pUnwindCode        = (PBYTE)pdw;
+    BYTE  b1, b2, b3, b4;
+    DWORD x, z;
+    for (DWORD i = 0; i < countOfUnwindCodes; i++)
+    {
+        // Does this byte start an epilog sequence? If so, note that fact.
+        if (epilogStartAt[i])
+        {
+            printf("    ---- Epilog start at index %u ----\n", i);
+        }
+
+        b1 = *pUnwindCode++;
+
+        if ((b1 & 0xE0) == 0)
+        {
+            // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16)
+            // TODO-Review:should say size < 512
+            x = b1 & 0x1F;
+            printf("    %02X          alloc_s #%u (0x%02X); addi.d sp, sp, -%u (0x%03X)\n", b1, x, x, x * 16, x * 16);
+        }
+#if 0
+        else if ((b1 & 0xE0) == 0x20)
+        {
+            // save_s0s1_x: 001zzzzz: save <s0,s1> pair at [sp-#Z*8]!, pre-indexed offset >= -248
+            z = b1 & 0x1F;
+            printf("    %02X          save_s0s1_x #%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, z, z,
+                   getRegName(REG_S0), getRegName(REG_S1), z * 8);
+        }
+        else if ((b1 & 0xF0) == 0x40)
+        {
+            // save_fpra: 0100zzzz | zzzzzzzz: save <fp,ra> pair at [sp+#Z*8], offset <= 4080
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2;
+            printf("    %02X %02X          save_fpra #%u (0x%03X); Two sd %s, %s, [sp, #%u]\n", b1, b2, z, z, getRegName(REG_FP),
+                   getRegName(REG_RA), z * 8);
+        }
+        else if ((b1 & 0xF0) == 0x80)
+        {
+            // save_fpra_x: 1000zzzz | zzzzzzzz: save <fp,ra> pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -32768
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            z = ((DWORD)(b1 & 0xF) << 8) | (DWORD)b2;
+            printf("    %02X %02X          save_fpra_x #%u (0x%03X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, z, z,
+                   getRegName(REG_FP), getRegName(REG_RA), (z + 1) * 8);
+        }
+#endif
+        else if ((b1 & 0xF8) == 0xC0)
+        {
+            // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 2k (2^7 * 16)
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x7) << 8) | (DWORD)b2;
+
+            printf("    %02X %02X       alloc_m #%u (0x%03X); addi.d sp, sp, -%u (0x%04X)\n", b1, b2, x, x, x * 16,
+                   x * 16);
+        }
+        else if (b1 == 0xD0)
+        {
+            // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)b2;
+            z = (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_reg X#%u Z#%u (0x%02X); st.d %s, sp, %u\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_RA + x), z * 8);
+        }
+#if 0
+        else if (b1 == 0xC8)
+        {
+            // save_regp: 11001000 | 0xxxzzzz | zzzzzzzz: save s(0 + #X) pair at [sp + #Z * 8], offset <= 4080
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_regp X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), z * 8);
+        }
+        else if (b1 == 0xCC)
+        {
+            // save_regp_x: 11001100 | 0xxxzzzz | zzzzzzzz: save pair s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >=
+            // -32768
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i+= 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_regp_x X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_S0 + x), getRegName(REG_S0 + x + 1), (z + 1) * 8);
+        }
+        else if ((b1 & 0xFE) == 0xD4)
+        {
+            // save_reg_x: 1101010x | xxxzzzzz: save reg s(0 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -16384
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x1) << 3) | (DWORD)(b2 >> 5);
+            z = (DWORD)(b2 & 0x1F);
+
+            printf("    %02X %02X       save_reg_x X#%u Z#%u (0x%02X); sd %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+                   getRegName(REG_S0 + x), (z + 1) * 8);
+        }
+        else if (b1 == 0xD6)
+        {
+            // save_rapair: 11010110 | 0xxxzzzz | zzzzzzzz: save pair <s0 + #X, ra> at [sp + #Z * 8], offset <= 32767
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_lrpair X#%u Z#%u (0x%02X); Two sd %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_S0 + x), getRegName(REG_RA), z * 8);
+        }
+        else if (b1 == 0xD8)
+        {
+            // save_fregp: 11011000 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X) at [sp + #Z * 8], offset <= 32767
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_fregp X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #%u]\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), z * 8);
+        }
+        else if (b1 == 0xDA)
+        {
+            // save_fregp_x: 11011010 | 0xxxzzzz | zzzzzzzz : save pair f(24 + #X), at [sp - (#Z + 1) * 8]!, pre-indexed offset >=
+            // -32768
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_fregp_x X#%u Z#%u (0x%02X); Two sdc1 %s, %s, [sp, #-%u]!\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_F24 + x, true), getRegName(REG_F24 + x + 1, true), (z + 1) * 8);
+        }
+#endif
+        else if (b1 == 0xDC)
+        {
+            // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = (DWORD)(b2 >> 4);
+            z = ((DWORD)(b2 & 0xF) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      save_freg X#%u Z#%u (0x%02X); fst.d %s, [sp, #%u]\n", b1, b2, b3, x, z, z,
+                   getRegName(REG_F24 + x), z * 8);
+        }
+#if 0
+        else if (b1 == 0xDE)
+        {
+            // save_freg_x: 11011110 | xxxzzzzz : save reg f(24 + #X) at [sp - (#Z + 1) * 8]!, pre - indexed offset >=
+            // -16384
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = (DWORD)(b2 >> 5);
+            z = (DWORD)(b2 & 0x1F);
+
+            printf("    %02X %02X       save_freg_x X#%u Z#%u (0x%02X); sdc1 %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+                   getRegName(REG_F24 + x, true), (z + 1) * 8);
+        }
+#endif
+        else if (b1 == 0xE0)
+        {
+            // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16)
+            assert(i + 3 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            b4 = *pUnwindCode++;
+            i += 3;
+
+            x = ((DWORD)b2 << 16) | ((DWORD)b3 << 8) | (DWORD)b4;
+
+            printf("    %02X %02X %02X %02X alloc_l %u (0x%06X); addi.d sp, sp, -%u (%06X)\n", b1, b2, b3, b4, x, x,
+                   x * 16, x * 16);
+        }
+        else if (b1 == 0xE1)
+        {
+            // set_fp: 11100001 : set up $29 : with : move fp, sp
+
+            printf("    %02X          set_fp; move %s, sp\n", b1, getRegName(REG_FP));
+        }
+        else if (b1 == 0xE2)
+        {
+            // add_fp: 11100010 | 000xxxxx | xxxxxxxx : set up fp with : addi.d fp, sp, #x * 8
+            assert(i + 2 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = ((DWORD)(b2 & 0x1F) << 8) | (DWORD)b3;
+
+            printf("    %02X %02X %02X      add_fp %u (0x%02X); addi.d %s, sp, #%u\n", b1, b2, b3, x, x, getRegName(REG_FP),
+                   x * 8);
+        }
+        else if (b1 == 0xE3)
+        {
+            // nop: 11100011: no unwind operation is required.
+
+            printf("    %02X          nop\n", b1);
+        }
+        else if (b1 == 0xE4)
+        {
+            // end: 11100100 : end of unwind code
+
+            printf("    %02X          end\n", b1);
+        }
+        else if (b1 == 0xE5)
+        {
+            // end_c: 11100101 : end of unwind code in current chained scope.
+
+            printf("    %02X          end_c\n", b1);
+        }
+        else if (b1 == 0xE6)
+        {
+            // save_next: 11100110 : save next non - volatile Int or FP register pair.
+
+            printf("    %02X          save_next\n", b1);
+        }
+        else
+        {
+            printf("===========[loongarch64] Unknown / reserved unwind code: %02X\n", b1);
+            // Unknown / reserved unwind code
+            assert(!"Internal error decoding unwind codes");
+        }
+    }
+
+    pdw += codeWords;
+    assert((PBYTE)pdw == pUnwindCode);
+    assert((PBYTE)pdw == pHeader + unwindBlockSize);
+
+    assert(XBit == 0); // We don't handle the case where exception data is present, such as the Exception Handler RVA
+
+    printf("\n");
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind APIs                                                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void Compiler::unwindBegProlog()
+{
+    assert(compGeneratingProlog);
+
+#if defined(TARGET_UNIX)
+    if (generateCFIUnwindCodes())
+    {
+        unwindBegPrologCFI();
+        return;
+    }
+#endif // TARGET_UNIX
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    // There is only one prolog for a function/funclet, and it comes first. So now is
+    // a good time to initialize all the unwind data structures.
+
+    emitLocation* startLoc;
+    emitLocation* endLoc;
+    unwindGetFuncLocations(func, true, &startLoc, &endLoc);
+
+    func->uwi.InitUnwindInfo(this, startLoc, endLoc);
+    func->uwi.CaptureLocation();
+
+    func->uwiCold = NULL; // No cold data yet
+}
+
+void Compiler::unwindEndProlog()
+{
+    assert(compGeneratingProlog);
+}
+
+void Compiler::unwindBegEpilog()
+{
+    assert(compGeneratingEpilog);
+
+#if defined(TARGET_UNIX)
+    if (generateCFIUnwindCodes())
+    {
+        return;
+    }
+#endif // TARGET_UNIX
+
+    funCurrentFunc()->uwi.AddEpilog();
+}
+
+void Compiler::unwindEndEpilog()
+{
+    assert(compGeneratingEpilog);
+}
+
+// The instructions between the last captured "current state" and the current instruction
+// are in the prolog but have no effect for unwinding. Emit the appropriate NOP unwind codes
+// for them.
+void Compiler::unwindPadding()
+{
+#if defined(TARGET_UNIX)
+    if (generateCFIUnwindCodes())
+    {
+        return;
+    }
+#endif // TARGET_UNIX
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+    GetEmitter()->emitUnwindNopPadding(pu->GetCurrentEmitterLocation(), this);
+}
+
+// Ask the VM to reserve space for the unwind information for the function and
+// all its funclets.
+void Compiler::unwindReserve()
+{
+    assert(!compGeneratingProlog);
+    assert(!compGeneratingEpilog);
+
+    assert(compFuncInfoCount > 0);
+    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    {
+        unwindReserveFunc(funGetFunc(funcIdx));
+    }
+}
+
+void Compiler::unwindReserveFunc(FuncInfoDsc* func)
+{
+    BOOL isFunclet          = (func->funKind == FUNC_ROOT) ? FALSE : TRUE;
+    bool funcHasColdSection = false;
+
+#if defined(TARGET_UNIX)
+    if (generateCFIUnwindCodes())
+    {
+        DWORD unwindCodeBytes = 0;
+        if (fgFirstColdBlock != nullptr)
+        {
+            eeReserveUnwindInfo(isFunclet, true /*isColdCode*/, unwindCodeBytes);
+        }
+        unwindCodeBytes = (DWORD)(func->cfiCodes->size() * sizeof(CFI_CODE));
+        eeReserveUnwindInfo(isFunclet, false /*isColdCode*/, unwindCodeBytes);
+
+        return;
+    }
+#endif // TARGET_UNIX
+
+    // If there is cold code, split the unwind data between the hot section and the
+    // cold section. This needs to be done before we split into fragments, as each
+    // of the hot and cold sections can have multiple fragments.
+
+    if (fgFirstColdBlock != NULL)
+    {
+        assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH
+
+        emitLocation* startLoc;
+        emitLocation* endLoc;
+        unwindGetFuncLocations(func, false, &startLoc, &endLoc);
+
+        func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo();
+        func->uwiCold->InitUnwindInfo(this, startLoc, endLoc);
+        func->uwiCold->HotColdSplitCodes(&func->uwi);
+
+        funcHasColdSection = true;
+    }
+
+    // First we need to split the function or funclet into fragments that are no larger
+    // than 512K, so the fragment size will fit in the unwind data "Function Length" field.
+    // The LOONGARCH Exception Data specification "Function Fragments" section describes this.
+    func->uwi.Split();
+
+    func->uwi.Reserve(isFunclet, true);
+
+    // After the hot section, split and reserve the cold section
+
+    if (funcHasColdSection)
+    {
+        assert(func->uwiCold != NULL);
+
+        func->uwiCold->Split();
+        func->uwiCold->Reserve(isFunclet, false);
+    }
+}
+
+// unwindEmit: Report all the unwind information to the VM.
+// Arguments:
+//      pHotCode:  Pointer to the beginning of the memory with the function and funclet hot  code
+//      pColdCode: Pointer to the beginning of the memory with the function and funclet cold code.
+
+void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
+{
+    assert(compFuncInfoCount > 0);
+    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    {
+        unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode);
+    }
+}
+
+void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode)
+{
+    // Verify that the JIT enum is in sync with the JIT-EE interface enum
+    static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT);
+    static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
+    static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);
+
+#if defined(TARGET_UNIX)
+    if (generateCFIUnwindCodes())
+    {
+        unwindEmitFuncCFI(func, pHotCode, pColdCode);
+        return;
+    }
+#endif // TARGET_UNIX
+
+    func->uwi.Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, true);
+
+    if (func->uwiCold != NULL)
+    {
+        func->uwiCold->Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, false);
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindPrologCodes
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// We're going to use the prolog codes memory to store the final unwind data.
+// Ensure we have enough memory to store everything. If 'epilogBytes' > 0, then
+// move the prolog codes so there are 'epilogBytes' bytes after the prolog codes.
+// Set the header pointer for future use, adding the header bytes (this pointer
+// is updated when a header byte is added), and remember the index that points
+// to the beginning of the header.
+
+void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes)
+{
+#ifdef DEBUG
+    // We're done adding codes. Check that we didn't accidentally create a bigger prolog.
+    unsigned codeSize = GetCodeSizeFromUnwindCodes(true);
+    assert(codeSize <= MAX_PROLOG_SIZE_BYTES);
+#endif // DEBUG
+
+    int prologBytes = Size();
+
+    EnsureSize(headerBytes + prologBytes + epilogBytes + 3); // 3 = padding bytes for alignment
+
+    upcUnwindBlockSlot = upcCodeSlot - headerBytes - epilogBytes; // Index of the first byte of the unwind header
+
+    assert(upcMemSize == upcUnwindBlockSlot + headerBytes + prologBytes + epilogBytes + 3);
+
+    upcHeaderSlot = upcUnwindBlockSlot - 1; // upcHeaderSlot is always incremented before storing
+    assert(upcHeaderSlot >= -1);
+
+    if (epilogBytes > 0)
+    {
+        // The prolog codes that are already at the end of the array need to get moved to the middle,
+        // with space for the non-matching epilog codes to follow.
+
+        memmove_s(&upcMem[upcUnwindBlockSlot + headerBytes], upcMemSize - (upcUnwindBlockSlot + headerBytes),
+                  &upcMem[upcCodeSlot], prologBytes);
+
+        // Note that the three UWC_END padding bytes still exist at the end of the array.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+        // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes.
+        memset(&upcMem[upcUnwindBlockSlot + headerBytes + prologBytes], 0, epilogBytes);
+#endif // DEBUG
+
+        upcEpilogSlot =
+            upcUnwindBlockSlot + headerBytes + prologBytes; // upcEpilogSlot points to the next epilog location to fill
+
+        // Update upcCodeSlot to point at the new beginning of the prolog codes
+        upcCodeSlot = upcUnwindBlockSlot + headerBytes;
+    }
+}
+
+// Add a header word. Header words are added starting at the beginning, in order: first to last.
+// This is in contrast to the prolog unwind codes, which are added in reverse order.
+void UnwindPrologCodes::AddHeaderWord(DWORD d)
+{
+    assert(-1 <= upcHeaderSlot);
+    assert(upcHeaderSlot + 4 < upcCodeSlot); // Don't collide with the unwind codes that are already there!
+
+    // Store it byte-by-byte in little-endian format. We've already ensured there is enough space
+    // in SetFinalSize().
+    upcMem[++upcHeaderSlot] = (BYTE)d;
+    upcMem[++upcHeaderSlot] = (BYTE)(d >> 8);
+    upcMem[++upcHeaderSlot] = (BYTE)(d >> 16);
+    upcMem[++upcHeaderSlot] = (BYTE)(d >> 24);
+}
+
+// AppendEpilog: copy the epilog bytes to the next epilog bytes slot
+void UnwindPrologCodes::AppendEpilog(UnwindEpilogInfo* pEpi)
+{
+    assert(upcEpilogSlot != -1);
+
+    int epiSize = pEpi->Size();
+    memcpy_s(&upcMem[upcEpilogSlot], upcMemSize - upcEpilogSlot - 3, pEpi->GetCodes(),
+             epiSize); // -3 to avoid writing to the alignment padding
+    assert(pEpi->GetStartIndex() ==
+           upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy it.
+
+    upcEpilogSlot += epiSize;
+    assert(upcEpilogSlot <= upcMemSize - 3);
+}
+
+// GetFinalInfo: return a pointer to the final unwind info to hand to the VM, and the size of this info in bytes
+void UnwindPrologCodes::GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize)
+{
+    assert(upcHeaderSlot + 1 == upcCodeSlot); // We better have filled in the header before asking for the final data!
+
+    *ppUnwindBlock = &upcMem[upcUnwindBlockSlot];
+
+    // We put 4 'end' codes at the end for padding, so we can ensure we have an
+    // unwind block that is a multiple of 4 bytes in size. Subtract off three 'end'
+    // codes (leave one), and then align the size up to a multiple of 4.
+    *pUnwindBlockSize = AlignUp((UINT)(upcMemSize - upcUnwindBlockSlot - 3), sizeof(DWORD));
+}
+
+int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi)
+{
+    if (Size() < pEpi->Size())
+    {
+        return -1;
+    }
+
+    int matchIndex = 0;//Size() - pEpi->Size();
+
+    BYTE* pProlog = GetCodes();
+    BYTE* pEpilog = pEpi->GetCodes();
+
+    //First check set_fp.
+    if (0 < pEpi->Size())
+    {
+        if (*pProlog == 0xE1)
+        {
+            pProlog++;
+            if (*pEpilog == 0xE1)
+            {
+                pEpilog++;
+            }
+            else
+            {
+                matchIndex = 1;
+            }
+        }
+        else if (*pProlog == 0xE2)
+        {
+            pProlog += 3;
+            if (*pEpilog == 0xE1)
+            {
+                pEpilog += 3;
+            }
+            else
+            {
+                matchIndex = 3;
+            }
+        }
+    }
+
+    if (0 == memcmp(pProlog, pEpilog, pEpi->Size()))
+    {
+        return matchIndex;
+    }
+
+    return -1;
+}
+
+// Copy the prolog codes from another prolog. The only time this is legal is
+// if we are at the initial state and no prolog codes have been added.
+// This is used to create the 'phantom' prolog for non-first fragments.
+
+void UnwindPrologCodes::CopyFrom(UnwindPrologCodes* pCopyFrom)
+{
+    assert(uwiComp == pCopyFrom->uwiComp);
+    assert(upcMem == upcMemLocal);
+    assert(upcMemSize == UPC_LOCAL_COUNT);
+    assert(upcHeaderSlot == -1);
+    assert(upcEpilogSlot == -1);
+
+    // Copy the codes
+    EnsureSize(pCopyFrom->upcMemSize);
+    assert(upcMemSize == pCopyFrom->upcMemSize);
+    memcpy_s(upcMem, upcMemSize, pCopyFrom->upcMem, pCopyFrom->upcMemSize);
+
+    // Copy the other data
+    upcCodeSlot        = pCopyFrom->upcCodeSlot;
+    upcHeaderSlot      = pCopyFrom->upcHeaderSlot;
+    upcEpilogSlot      = pCopyFrom->upcEpilogSlot;
+    upcUnwindBlockSlot = pCopyFrom->upcUnwindBlockSlot;
+}
+
+void UnwindPrologCodes::EnsureSize(int requiredSize)
+{
+    if (requiredSize > upcMemSize)
+    {
+        // Reallocate, and copy everything to a new array.
+
+        // Choose the next power of two size. This may or may not be the best choice.
+        noway_assert((requiredSize & 0xC0000000) == 0); // too big!
+        int newSize;
+        for (newSize = upcMemSize << 1; newSize < requiredSize; newSize <<= 1)
+        {
+            // do nothing
+        }
+
+        BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize];
+        memcpy_s(newUnwindCodes + newSize - upcMemSize, upcMemSize, upcMem,
+                 upcMemSize); // copy the existing data to the end
+#ifdef DEBUG
+        // Clear the old unwind codes; nobody should be looking at them
+        memset(upcMem, 0xFF, upcMemSize);
+#endif                           // DEBUG
+        upcMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator
+        upcCodeSlot += newSize - upcMemSize;
+        upcMemSize = newSize;
+    }
+}
+
+#ifdef DEBUG
+void UnwindPrologCodes::Dump(int indent)
+{
+    printf("%*sUnwindPrologCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  &upcMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&upcMemLocal[0]));
+    printf("%*s  upcMem: 0x%08p\n", indent, "", dspPtr(upcMem));
+    printf("%*s  upcMemSize: %d\n", indent, "", upcMemSize);
+    printf("%*s  upcCodeSlot: %d\n", indent, "", upcCodeSlot);
+    printf("%*s  upcHeaderSlot: %d\n", indent, "", upcHeaderSlot);
+    printf("%*s  upcEpilogSlot: %d\n", indent, "", upcEpilogSlot);
+    printf("%*s  upcUnwindBlockSlot: %d\n", indent, "", upcUnwindBlockSlot);
+
+    if (upcMemSize > 0)
+    {
+        printf("%*s  codes:", indent, "");
+        for (int i = 0; i < upcMemSize; i++)
+        {
+            printf(" %02x", upcMem[i]);
+            if (i == upcCodeSlot)
+                printf(" <-C");
+            else if (i == upcHeaderSlot)
+                printf(" <-H");
+            else if (i == upcEpilogSlot)
+                printf(" <-E");
+            else if (i == upcUnwindBlockSlot)
+                printf(" <-U");
+        }
+        printf("\n");
+    }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindEpilogCodes
+//
+///////////////////////////////////////////////////////////////////////////////
+
+void UnwindEpilogCodes::EnsureSize(int requiredSize)
+{
+    if (requiredSize > uecMemSize)
+    {
+        // Reallocate, and copy everything to a new array.
+
+        // Choose the next power of two size. This may or may not be the best choice.
+        noway_assert((requiredSize & 0xC0000000) == 0); // too big!
+        int newSize;
+        for (newSize = uecMemSize << 1; newSize < requiredSize; newSize <<= 1)
+        {
+            // do nothing
+        }
+
+        BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize];
+        memcpy_s(newUnwindCodes, newSize, uecMem, uecMemSize);
+#ifdef DEBUG
+        // Clear the old unwind codes; nobody should be looking at them
+        memset(uecMem, 0xFF, uecMemSize);
+#endif                           // DEBUG
+        uecMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator
+        // uecCodeSlot stays the same
+        uecMemSize = newSize;
+    }
+}
+
+#ifdef DEBUG
+void UnwindEpilogCodes::Dump(int indent)
+{
+    printf("%*sUnwindEpilogCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  &uecMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&uecMemLocal[0]));
+    printf("%*s  uecMem: 0x%08p\n", indent, "", dspPtr(uecMem));
+    printf("%*s  uecMemSize: %d\n", indent, "", uecMemSize);
+    printf("%*s  uecCodeSlot: %d\n", indent, "", uecCodeSlot);
+    printf("%*s  uecFinalized: %s\n", indent, "", dspBool(uecFinalized));
+
+    if (uecMemSize > 0)
+    {
+        printf("%*s  codes:", indent, "");
+        for (int i = 0; i < uecMemSize; i++)
+        {
+            printf(" %02x", uecMem[i]);
+            if (i == uecCodeSlot)
+                printf(" <-C"); // Indicate the current pointer
+        }
+        printf("\n");
+    }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindEpilogInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Do the current unwind codes match those of the argument epilog?
+// If they don't match, return -1. If they do, return the offset into
+// our codes at which the argument codes match. Note that this means that
+// the argument codes can match a subset of our codes. The subset needs to be at
+// the end, for the "end" code to match.
+//
+// Note that if we wanted to handle 0xFD and 0xFE codes, by converting
+// an existing 0xFF code to one of those, we might do that here.
+
+int UnwindEpilogInfo::Match(UnwindEpilogInfo* pEpi)
+{
+    if (Matches())
+    {
+        // We are already matched to someone else, and won't provide codes to the final layout
+        return -1;
+    }
+
+    if (Size() < pEpi->Size())
+    {
+        return -1;
+    }
+
+    int matchIndex = Size() - pEpi->Size();
+
+    if (0 == memcmp(GetCodes() + matchIndex, pEpi->GetCodes(), pEpi->Size()))
+    {
+        return matchIndex;
+    }
+
+    return -1;
+}
+
+void UnwindEpilogInfo::CaptureEmitLocation()
+{
+    noway_assert(epiEmitLocation == NULL); // This function is only called once per epilog
+    epiEmitLocation = new (uwiComp, CMK_UnwindInfo) emitLocation();
+    epiEmitLocation->CaptureLocation(uwiComp->GetEmitter());
+}
+
+void UnwindEpilogInfo::FinalizeOffset()
+{
+    epiStartOffset = epiEmitLocation->CodeOffset(uwiComp->GetEmitter());
+}
+
+#ifdef DEBUG
+void UnwindEpilogInfo::Dump(int indent)
+{
+    printf("%*sUnwindEpilogInfo @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  epiNext: 0x%08p\n", indent, "", dspPtr(epiNext));
+    printf("%*s  epiEmitLocation: 0x%08p\n", indent, "", dspPtr(epiEmitLocation));
+    printf("%*s  epiStartOffset: 0x%x\n", indent, "", epiStartOffset);
+    printf("%*s  epiMatches: %s\n", indent, "", dspBool(epiMatches));
+    printf("%*s  epiStartIndex: %d\n", indent, "", epiStartIndex);
+
+    epiCodes.Dump(indent + 2);
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindFragmentInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+UnwindFragmentInfo::UnwindFragmentInfo(Compiler* comp, emitLocation* emitLoc, bool hasPhantomProlog)
+    : UnwindBase(comp)
+    , ufiNext(NULL)
+    , ufiEmitLoc(emitLoc)
+    , ufiHasPhantomProlog(hasPhantomProlog)
+    , ufiPrologCodes(comp)
+    , ufiEpilogFirst(comp)
+    , ufiEpilogList(NULL)
+    , ufiEpilogLast(NULL)
+    , ufiCurCodes(&ufiPrologCodes)
+    , ufiSize(0)
+    , ufiStartOffset(UFI_ILLEGAL_OFFSET)
+{
+#ifdef DEBUG
+    ufiNum         = 1;
+    ufiInProlog    = true;
+    ufiInitialized = UFI_INITIALIZED_PATTERN;
+#endif // DEBUG
+}
+
+void UnwindFragmentInfo::FinalizeOffset()
+{
+    if (ufiEmitLoc == NULL)
+    {
+        // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        ufiStartOffset = 0;
+    }
+    else
+    {
+        ufiStartOffset = ufiEmitLoc->CodeOffset(uwiComp->GetEmitter());
+    }
+
+    for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        pEpi->FinalizeOffset();
+    }
+}
+
+void UnwindFragmentInfo::AddEpilog()
+{
+    assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+#ifdef DEBUG
+    if (ufiInProlog)
+    {
+        assert(ufiEpilogList == NULL);
+        ufiInProlog = false;
+    }
+    else
+    {
+        assert(ufiEpilogList != NULL);
+    }
+#endif // DEBUG
+
+    // Either allocate a new epilog object, or, for the first one, use the
+    // preallocated one that is a member of the UnwindFragmentInfo class.
+
+    UnwindEpilogInfo* newepi;
+
+    if (ufiEpilogList == NULL)
+    {
+        // Use the epilog that's in the class already. Be sure to initialize it!
+        newepi = ufiEpilogList = &ufiEpilogFirst;
+    }
+    else
+    {
+        newepi = new (uwiComp, CMK_UnwindInfo) UnwindEpilogInfo(uwiComp);
+    }
+
+    // Put the new epilog at the end of the epilog list
+
+    if (ufiEpilogLast != NULL)
+    {
+        ufiEpilogLast->epiNext = newepi;
+    }
+
+    ufiEpilogLast = newepi;
+
+    // What is the starting code offset of the epilog? Store an emitter location
+    // so we can ask the emitter later, after codegen.
+
+    newepi->CaptureEmitLocation();
+
+    // Put subsequent unwind codes in this new epilog
+
+    ufiCurCodes = &newepi->epiCodes;
+}
+
+// Copy the prolog codes from the 'pCopyFrom' fragment. These prolog codes will
+// become 'phantom' prolog codes in this fragment. Note that this fragment should
+// not have any prolog codes currently; it is at the initial state.
+
+void UnwindFragmentInfo::CopyPrologCodes(UnwindFragmentInfo* pCopyFrom)
+{
+    ufiPrologCodes.CopyFrom(&pCopyFrom->ufiPrologCodes);
+    ufiPrologCodes.AddCode(UWC_END_C);
+}
+
+// Split the epilog codes that currently exist in 'pSplitFrom'. The ones that represent
+// epilogs that start at or after the location represented by 'emitLoc' are removed
+// from 'pSplitFrom' and moved to this fragment. Note that this fragment should not have
+// any epilog codes currently; it is at the initial state.
+
+void UnwindFragmentInfo::SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom)
+{
+    UnwindEpilogInfo* pEpiPrev;
+    UnwindEpilogInfo* pEpi;
+
+    UNATIVE_OFFSET splitOffset = emitLoc->CodeOffset(uwiComp->GetEmitter());
+
+    for (pEpiPrev = NULL, pEpi = pSplitFrom->ufiEpilogList; pEpi != NULL; pEpiPrev = pEpi, pEpi = pEpi->epiNext)
+    {
+        pEpi->FinalizeOffset(); // Get the offset of the epilog from the emitter so we can compare it
+        if (pEpi->GetStartOffset() >= splitOffset)
+        {
+            // This epilog and all following epilogs, which must be in order of increasing offsets,
+            // get moved to this fragment.
+
+            // Splice in the epilogs to this fragment. Set the head of the epilog
+            // list to this epilog.
+            ufiEpilogList = pEpi; // In this case, don't use 'ufiEpilogFirst'
+            ufiEpilogLast = pSplitFrom->ufiEpilogLast;
+
+            // Splice out the tail of the list from the 'pSplitFrom' epilog list
+            pSplitFrom->ufiEpilogLast = pEpiPrev;
+            if (pSplitFrom->ufiEpilogLast == NULL)
+            {
+                pSplitFrom->ufiEpilogList = NULL;
+            }
+            else
+            {
+                pSplitFrom->ufiEpilogLast->epiNext = NULL;
+            }
+
+            // No more codes should be added once we start splitting
+            pSplitFrom->ufiCurCodes = NULL;
+            ufiCurCodes             = NULL;
+
+            break;
+        }
+    }
+}
+
+// Is this epilog at the end of an unwind fragment? Ask the emitter.
+// Note that we need to know this before all code offsets are finalized,
+// so we can determine whether we can omit an epilog scope word for a
+// single matching epilog.
+
+bool UnwindFragmentInfo::IsAtFragmentEnd(UnwindEpilogInfo* pEpi)
+{
+    return uwiComp->GetEmitter()->emitIsFuncEnd(pEpi->epiEmitLocation, (ufiNext == NULL) ? NULL : ufiNext->ufiEmitLoc);
+}
+
+// Merge the unwind codes as much as possible.
+// This function is called before all offsets are final.
+// Also, compute the size of the final unwind block. Store this
+// and some other data for later, when we actually emit the
+// unwind block.
+
+void UnwindFragmentInfo::MergeCodes()
+{
+    assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+    unsigned epilogCount     = 0;
+    unsigned epilogCodeBytes = 0; // The total number of unwind code bytes used by epilogs that don't match the
+                                  // prolog codes
+    unsigned epilogIndex = ufiPrologCodes.Size(); // The "Epilog Start Index" for the next non-matching epilog codes
+    UnwindEpilogInfo* pEpi;
+
+    for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        ++epilogCount;
+
+        pEpi->FinalizeCodes();
+
+        // Does this epilog match the prolog?
+        // NOTE: for the purpose of matching, we don't handle the 0xFD and 0xFE end codes that allow slightly unequal
+        // prolog and epilog codes.
+
+        int matchIndex;
+
+        matchIndex = ufiPrologCodes.Match(pEpi);
+        if (matchIndex != -1)
+        {
+            pEpi->SetMatches();
+            pEpi->SetStartIndex(matchIndex); // Prolog codes start at zero, so matchIndex is exactly the start index
+        }
+        else
+        {
+            // The epilog codes don't match the prolog codes. Do they match any of the epilogs
+            // we've seen so far?
+
+            bool matched = false;
+            for (UnwindEpilogInfo* pEpi2 = ufiEpilogList; pEpi2 != pEpi; pEpi2 = pEpi2->epiNext)
+            {
+                matchIndex = pEpi2->Match(pEpi);
+                if (matchIndex != -1)
+                {
+                    // Use the same epilog index as the one we matched, as it has already been set.
+                    pEpi->SetMatches();
+                    pEpi->SetStartIndex(pEpi2->GetStartIndex() + matchIndex); // We might match somewhere inside pEpi2's
+                                                                              // codes, in which case matchIndex > 0
+                    matched = true;
+                    break;
+                }
+            }
+
+            if (!matched)
+            {
+                pEpi->SetStartIndex(epilogIndex); // We'll copy these codes to the next available location
+                epilogCodeBytes += pEpi->Size();
+                epilogIndex += pEpi->Size();
+            }
+        }
+    }
+
+    DWORD codeBytes = ufiPrologCodes.Size() + epilogCodeBytes;
+    codeBytes       = AlignUp(codeBytes, sizeof(DWORD));
+
+    DWORD codeWords =
+        codeBytes / sizeof(DWORD); // This is how many words we need to store all the unwind codes in the unwind block
+
+    // Do we need the 2nd header word for "Extended Code Words" or "Extended Epilog Count"?
+
+    bool needExtendedCodeWordsEpilogCount =
+        (codeWords > UW_MAX_CODE_WORDS_COUNT) || (epilogCount > UW_MAX_EPILOG_COUNT);
+
+    // How many epilog scope words do we need?
+
+    bool     setEBit      = false;       // do we need to set the E bit?
+    unsigned epilogScopes = epilogCount; // Note that this could be zero if we have no epilogs!
+
+    if (epilogCount == 1)
+    {
+        assert(ufiEpilogList != NULL);
+        assert(ufiEpilogList->epiNext == NULL);
+
+        if (ufiEpilogList->Matches() && (ufiEpilogList->GetStartIndex() == 0) && // The match is with the prolog
+            !needExtendedCodeWordsEpilogCount && IsAtFragmentEnd(ufiEpilogList))
+        {
+            epilogScopes = 0; // Don't need any epilog scope words
+            setEBit      = true;
+        }
+    }
+
+    DWORD headerBytes = (1                                            // Always need first header DWORD
+                         + (needExtendedCodeWordsEpilogCount ? 1 : 0) // Do we need the 2nd DWORD for Extended Code
+                                                                      // Words or Extended Epilog Count?
+                         + epilogScopes                               // One DWORD per epilog scope, for EBit = 0
+                         ) *
+                        sizeof(DWORD); // convert it to bytes
+
+    DWORD finalSize = headerBytes + codeBytes; // Size of actual unwind codes, aligned up to 4-byte words,
+                                               // including end padding if necessary
+
+    // Construct the final unwind information.
+
+    // We re-use the memory for the prolog unwind codes to construct the full unwind data. If all the epilogs
+    // match the prolog, this is easy: we just prepend the header. If there are epilog codes that don't match
+    // the prolog, we still use the prolog codes memory, but it's a little more complicated, since the
+    // unwind info is ordered as: (a) header, (b) prolog codes, (c) non-matching epilog codes. And, the prolog
+    // codes array is filled in from end-to-beginning. So, we compute the size of memory we need, ensure we
+    // have that much memory, and then copy the prolog codes to the right place, appending the non-matching
+    // epilog codes and prepending the header.
+
+    ufiPrologCodes.SetFinalSize(headerBytes, epilogCodeBytes);
+
+    if (epilogCodeBytes != 0)
+    {
+        // We need to copy the epilog code bytes to their final memory location
+
+        for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+        {
+            if (!pEpi->Matches())
+            {
+                ufiPrologCodes.AppendEpilog(pEpi);
+            }
+        }
+    }
+
+    // Save some data for later
+    ufiSize                             = finalSize;
+    ufiSetEBit                          = setEBit;
+    ufiNeedExtendedCodeWordsEpilogCount = needExtendedCodeWordsEpilogCount;
+    ufiCodeWords                        = codeWords;
+    ufiEpilogScopes                     = epilogScopes;
+}
+
+// Finalize: Prepare the unwind information for the VM. Compute and prepend the unwind header.
+
+void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
+{
+    assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+#ifdef DEBUG
+    if (0 && uwiComp->verbose)
+    {
+        printf("*************** Before fragment #%d finalize\n", ufiNum);
+        Dump();
+    }
+#endif
+
+// Compute the header
+
+    noway_assert((functionLength & 3) == 0);
+    DWORD headerFunctionLength = functionLength / 4;
+
+    DWORD headerVers = 0; // Version of the unwind info is zero. No other version number is currently defined.
+    DWORD headerXBit = 0; // We never generate "exception data", but the VM might add some.
+    DWORD headerEBit;
+    DWORD headerEpilogCount;                        // This depends on how we set headerEBit.
+    DWORD headerCodeWords;
+    DWORD headerExtendedEpilogCount = 0; // This depends on how we set headerEBit.
+    DWORD headerExtendedCodeWords   = 0;
+
+    if (ufiSetEBit)
+    {
+        headerEBit        = 1;
+        headerEpilogCount = ufiEpilogList->GetStartIndex(); // probably zero -- the start of the prolog codes!
+        headerCodeWords   = ufiCodeWords;
+    }
+    else
+    {
+        headerEBit = 0;
+
+        if (ufiNeedExtendedCodeWordsEpilogCount)
+        {
+            headerEpilogCount         = 0;
+            headerCodeWords           = 0;
+            headerExtendedEpilogCount = ufiEpilogScopes;
+            headerExtendedCodeWords   = ufiCodeWords;
+        }
+        else
+        {
+            headerEpilogCount = ufiEpilogScopes;
+            headerCodeWords   = ufiCodeWords;
+        }
+    }
+
+    // Start writing the header
+
+    noway_assert(headerFunctionLength <=
+                 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits, we have an internal error
+
+    if ((headerEpilogCount > UW_MAX_EPILOG_COUNT) || (headerCodeWords > UW_MAX_CODE_WORDS_COUNT))
+    {
+        IMPL_LIMITATION("unwind data too large");
+    }
+
+    DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) |
+                   (headerEpilogCount << 22) | (headerCodeWords << 27);
+
+    ufiPrologCodes.AddHeaderWord(header);
+
+    // Construct the second header word, if needed
+
+    if (ufiNeedExtendedCodeWordsEpilogCount)
+    {
+        noway_assert(headerEBit == 0);
+        noway_assert(headerEpilogCount == 0);
+        noway_assert(headerCodeWords == 0);
+        noway_assert((headerExtendedEpilogCount > UW_MAX_EPILOG_COUNT) ||
+                     (headerExtendedCodeWords > UW_MAX_CODE_WORDS_COUNT));
+
+        if ((headerExtendedEpilogCount > UW_MAX_EXTENDED_EPILOG_COUNT) ||
+            (headerExtendedCodeWords > UW_MAX_EXTENDED_CODE_WORDS_COUNT))
+        {
+            IMPL_LIMITATION("unwind data too large");
+        }
+
+        DWORD header2 = headerExtendedEpilogCount | (headerExtendedCodeWords << 16);
+
+        ufiPrologCodes.AddHeaderWord(header2);
+    }
+
+    // Construct the epilog scope words, if needed
+
+    if (!ufiSetEBit)
+    {
+        for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+        {
+            // The epilog must strictly follow the prolog. The prolog is in the first fragment of
+            // the hot section. If this epilog is at the start of a fragment, it can't be the
+            // first fragment in the hot section. We actually don't know if we're processing
+            // the hot or cold section (or a funclet), so we can't distinguish these cases. Thus,
+            // we just assert that the epilog starts within the fragment.
+            assert(pEpi->GetStartOffset() >= GetStartOffset());
+
+            // We report the offset of an epilog as the offset from the beginning of the function/funclet fragment,
+            // NOT the offset from the beginning of the main function.
+            DWORD headerEpilogStartOffset = pEpi->GetStartOffset() - GetStartOffset();
+
+            noway_assert((headerEpilogStartOffset & 3) == 0);
+            headerEpilogStartOffset /= 4; // The unwind data stores the actual offset divided by 4 (since the low 2 bits
+                                          // of the actual offset is always zero)
+
+            DWORD headerEpilogStartIndex = pEpi->GetStartIndex();
+
+            if ((headerEpilogStartOffset > UW_MAX_EPILOG_START_OFFSET) ||
+                (headerEpilogStartIndex > UW_MAX_EPILOG_START_INDEX))
+            {
+                IMPL_LIMITATION("unwind data too large");
+            }
+
+            DWORD epilogScopeWord = headerEpilogStartOffset | (headerEpilogStartIndex << 22);
+
+            ufiPrologCodes.AddHeaderWord(epilogScopeWord);
+        }
+    }
+
+    // The unwind code words are already here, following the header, so we're done!
+}
+
+void UnwindFragmentInfo::Reserve(bool isFunclet, bool isHotCode)
+{
+    assert(isHotCode || !isFunclet); // TODO-CQ: support hot/cold splitting in functions with EH
+
+    MergeCodes();
+
+    BOOL isColdCode = isHotCode ? FALSE : TRUE;
+
+    ULONG unwindSize = Size();
+
+#ifdef DEBUG
+    if (uwiComp->verbose)
+    {
+        if (ufiNum != 1)
+            printf("reserveUnwindInfo: fragment #%d:\n", ufiNum);
+    }
+#endif
+
+    uwiComp->eeReserveUnwindInfo(isFunclet, isColdCode, unwindSize);
+}
+
+// Allocate the unwind info for a fragment with the VM.
+// Arguments:
+//      funKind:       funclet kind
+//      pHotCode:      hot section code buffer
+//      pColdCode:     cold section code buffer
+//      funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a
+//                     function/funclet.
+//      isHotCode:     are we allocating the unwind info for the hot code section?
+
+void UnwindFragmentInfo::Allocate(
+    CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode)
+{
+    UNATIVE_OFFSET startOffset;
+    UNATIVE_OFFSET endOffset;
+    UNATIVE_OFFSET codeSize;
+
+    // We don't support hot/cold splitting with EH, so if there is cold code, this
+    // better not be a funclet!
+    // TODO-CQ: support funclets in cold code
+
+    noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT);
+
+    // Compute the final size, and start and end offsets of the fragment
+
+    startOffset = GetStartOffset();
+
+    if (ufiNext == NULL)
+    {
+        // This is the last fragment, so the fragment extends to the end of the function/fragment.
+        assert(funcEndOffset != 0);
+        endOffset = funcEndOffset;
+    }
+    else
+    {
+        // The fragment length is all the code between the beginning of this fragment
+        // and the beginning of the next fragment. Note that all fragments have had their
+        // offsets computed before any fragment is allocated.
+        endOffset = ufiNext->GetStartOffset();
+    }
+
+    assert(endOffset > startOffset);
+    codeSize = endOffset - startOffset;
+
+    // Finalize the fragment unwind block to hand to the VM
+
+    Finalize(codeSize);
+
+    // Get the final unwind information and hand it to the VM
+
+    ULONG unwindBlockSize;
+    BYTE* pUnwindBlock;
+
+    GetFinalInfo(&pUnwindBlock, &unwindBlockSize);
+
+#ifdef DEBUG
+    if (uwiComp->opts.dspUnwind)
+    {
+        DumpUnwindInfo(uwiComp, isHotCode, startOffset, endOffset, pUnwindBlock, unwindBlockSize);
+    }
+#endif // DEBUG
+
+    // Adjust for cold or hot code:
+    // 1. The VM doesn't want the cold code pointer unless this is cold code.
+    // 2. The startOffset and endOffset need to be from the base of the hot section for hot code
+    //    and from the base of the cold section for cold code
+
+    if (isHotCode)
+    {
+        assert(endOffset <= uwiComp->info.compTotalHotCodeSize);
+        pColdCode = NULL;
+    }
+    else
+    {
+        assert(startOffset >= uwiComp->info.compTotalHotCodeSize);
+        startOffset -= uwiComp->info.compTotalHotCodeSize;
+        endOffset -= uwiComp->info.compTotalHotCodeSize;
+    }
+
+#ifdef DEBUG
+    if (uwiComp->verbose)
+    {
+        if (ufiNum != 1)
+            printf("unwindEmit: fragment #%d:\n", ufiNum);
+    }
+#endif // DEBUG
+
+    uwiComp->eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, unwindBlockSize, pUnwindBlock,
+                               funKind);
+}
+
+#ifdef DEBUG
+void UnwindFragmentInfo::Dump(int indent)
+{
+    unsigned          count;
+    UnwindEpilogInfo* pEpi;
+
+    count = 0;
+    for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        ++count;
+    }
+
+    printf("%*sUnwindFragmentInfo #%d, @0x%08p, size:%d:\n", indent, "", ufiNum, dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  ufiNext: 0x%08p\n", indent, "", dspPtr(ufiNext));
+    printf("%*s  ufiEmitLoc: 0x%08p\n", indent, "", dspPtr(ufiEmitLoc));
+    printf("%*s  ufiHasPhantomProlog: %s\n", indent, "", dspBool(ufiHasPhantomProlog));
+    printf("%*s  %d epilog%s\n", indent, "", count, (count != 1) ? "s" : "");
+    printf("%*s  ufiEpilogList: 0x%08p\n", indent, "", dspPtr(ufiEpilogList));
+    printf("%*s  ufiEpilogLast: 0x%08p\n", indent, "", dspPtr(ufiEpilogLast));
+    printf("%*s  ufiCurCodes: 0x%08p\n", indent, "", dspPtr(ufiCurCodes));
+    printf("%*s  ufiSize: %u\n", indent, "", ufiSize);
+    printf("%*s  ufiSetEBit: %s\n", indent, "", dspBool(ufiSetEBit));
+    printf("%*s  ufiNeedExtendedCodeWordsEpilogCount: %s\n", indent, "", dspBool(ufiNeedExtendedCodeWordsEpilogCount));
+    printf("%*s  ufiCodeWords: %u\n", indent, "", ufiCodeWords);
+    printf("%*s  ufiEpilogScopes: %u\n", indent, "", ufiEpilogScopes);
+    printf("%*s  ufiStartOffset: 0x%x\n", indent, "", ufiStartOffset);
+    printf("%*s  ufiInProlog: %s\n", indent, "", dspBool(ufiInProlog));
+    printf("%*s  ufiInitialized: 0x%08x\n", indent, "", ufiInitialized);
+
+    ufiPrologCodes.Dump(indent + 2);
+
+    for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        pEpi->Dump(indent + 2);
+    }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+void UnwindInfo::InitUnwindInfo(Compiler* comp, emitLocation* startLoc, emitLocation* endLoc)
+{
+    uwiComp = comp;
+
+    // The first fragment is a member of UnwindInfo, so it doesn't need to be allocated.
+    // However, its constructor needs to be explicitly called, since the constructor for
+    // UnwindInfo is not called.
+
+    new (&uwiFragmentFirst, jitstd::placement_t()) UnwindFragmentInfo(comp, startLoc, false);
+
+    uwiFragmentLast = &uwiFragmentFirst;
+
+    uwiEndLoc = endLoc;
+
+    // Allocate an emitter location object. It is initialized to something
+    // invalid: it has a null 'ig' that needs to get set before it can be used.
+    // Note that when we create an UnwindInfo for the cold section, this never
+    // gets initialized with anything useful, since we never add unwind codes
+    // to the cold section; we simply distribute the existing (previously added) codes.
+    uwiCurLoc = new (uwiComp, CMK_UnwindInfo) emitLocation();
+
+#ifdef DEBUG
+    uwiInitialized = UWI_INITIALIZED_PATTERN;
+    uwiAddingNOP   = false;
+#endif // DEBUG
+}
+
+// Split the unwind codes in 'puwi' into those that are in the hot section (leave them in 'puwi')
+// and those that are in the cold section (move them to 'this'). There is exactly one fragment
+// in each UnwindInfo; the fragments haven't been split for size, yet.
+
+void UnwindInfo::HotColdSplitCodes(UnwindInfo* puwi)
+{
+    // Ensure that there is exactly a single fragment in both the hot and the cold sections
+    assert(&uwiFragmentFirst == uwiFragmentLast);
+    assert(&puwi->uwiFragmentFirst == puwi->uwiFragmentLast);
+    assert(uwiFragmentLast->ufiNext == NULL);
+    assert(puwi->uwiFragmentLast->ufiNext == NULL);
+
+    // The real prolog is in the hot section, so this, cold, section has a phantom prolog
+    uwiFragmentLast->ufiHasPhantomProlog = true;
+    uwiFragmentLast->CopyPrologCodes(puwi->uwiFragmentLast);
+
+    // Now split the epilog codes
+    uwiFragmentLast->SplitEpilogCodes(uwiFragmentLast->ufiEmitLoc, puwi->uwiFragmentLast);
+}
+
+// Split the function or funclet into fragments that are no larger than 512K,
+// so the fragment size will fit in the unwind data "Function Length" field.
+// The LOONGARCH Exception Data specification "Function Fragments" section describes this.
+// We split the function so that it is no larger than 512K bytes, or the value of
+// the COMPlus_JitSplitFunctionSize value, if defined (and smaller). We must determine
+// how to split the function/funclet before we issue the instructions, so we can
+// reserve the unwind space with the VM. The instructions issued may shrink (but not
+// expand!) during issuing (although this is extremely rare in any case, and may not
+// actually occur on LOONGARCH), so we don't finalize actual sizes or offsets.
+//
+// LOONGARCH64 has very similar limitations, except functions can be up to 1MB. TODO-LOONGARCH64-Bug?: make sure this works!
+//
+// We don't split any prolog or epilog. Ideally, we might not split an instruction,
+// although that doesn't matter because the unwind at any point would still be
+// well-defined.
+
+void UnwindInfo::Split()
+{
+    UNATIVE_OFFSET maxFragmentSize; // The maximum size of a code fragment in bytes
+
+    maxFragmentSize = UW_MAX_FRAGMENT_SIZE_BYTES;
+
+#ifdef DEBUG
+    // Consider COMPlus_JitSplitFunctionSize
+    unsigned splitFunctionSize = (unsigned)JitConfig.JitSplitFunctionSize();
+
+    if (splitFunctionSize != 0)
+        if (splitFunctionSize < maxFragmentSize)
+            maxFragmentSize = splitFunctionSize;
+#endif // DEBUG
+
+    // Now, there should be exactly one fragment.
+
+    assert(uwiFragmentLast != NULL);
+    assert(uwiFragmentLast == &uwiFragmentFirst);
+    assert(uwiFragmentLast->ufiNext == NULL);
+
+    // Find the code size of this function/funclet.
+
+    UNATIVE_OFFSET startOffset;
+    UNATIVE_OFFSET endOffset;
+    UNATIVE_OFFSET codeSize;
+
+    if (uwiFragmentLast->ufiEmitLoc == NULL)
+    {
+        // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        startOffset = 0;
+    }
+    else
+    {
+        startOffset = uwiFragmentLast->ufiEmitLoc->CodeOffset(uwiComp->GetEmitter());
+    }
+
+    if (uwiEndLoc == NULL)
+    {
+        // Note that compTotalHotCodeSize and compTotalColdCodeSize are computed before issuing instructions
+        // from the emitter instruction group offsets, and will be accurate unless the issued code shrinks.
+        // compNativeCodeSize is precise, but is only set after instructions are issued, which is too late
+        // for us, since we need to decide how many fragments we need before the code memory is allocated
+        // (which is before instruction issuing).
+        UNATIVE_OFFSET estimatedTotalCodeSize =
+            uwiComp->info.compTotalHotCodeSize + uwiComp->info.compTotalColdCodeSize;
+        assert(estimatedTotalCodeSize != 0);
+        endOffset = estimatedTotalCodeSize;
+    }
+    else
+    {
+        endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter());
+    }
+
+    assert(endOffset > startOffset); // there better be at least 1 byte of code
+    codeSize = endOffset - startOffset;
+
+    // Now that we know the code size for this section (main function hot or cold, or funclet),
+    // figure out how many fragments we're going to need.
+
+    UNATIVE_OFFSET numberOfFragments = (codeSize + maxFragmentSize - 1) / maxFragmentSize; // round up
+    assert(numberOfFragments > 0);
+
+    if (numberOfFragments == 1)
+    {
+        // No need to split; we're done
+        return;
+    }
+
+    // Now, we're going to commit to splitting the function into "numberOfFragments" fragments,
+    // for the purpose of unwind information. We need to do the actual splits so we can figure out
+    // the size of each piece of unwind data for the call to reserveUnwindInfo(). We won't know
+    // the actual offsets of the splits since we haven't issued the instructions yet, so store
+    // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase,
+    // like we do for the function length and epilog offsets.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (uwiComp->verbose)
+    {
+        printf("Split unwind info into %d fragments (function/funclet size: %d, maximum fragment size: %d)\n",
+               numberOfFragments, codeSize, maxFragmentSize);
+    }
+#endif // DEBUG
+
+    // Call the emitter to do the split, and call us back for every split point it chooses.
+    uwiComp->GetEmitter()->emitSplit(uwiFragmentLast->ufiEmitLoc, uwiEndLoc, maxFragmentSize, (void*)this,
+                                   EmitSplitCallback);
+
+#ifdef DEBUG
+    // Did the emitter split the function/funclet into as many fragments as we asked for?
+    // It might be fewer if the COMPlus_JitSplitFunctionSize was used, but it better not
+    // be fewer if we're splitting into 512K blocks!
+
+    unsigned fragCount = 0;
+    for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        ++fragCount;
+    }
+    if (fragCount < numberOfFragments)
+    {
+        if (uwiComp->verbose)
+        {
+            printf("WARNING: asked the emitter for %d fragments, but only got %d\n", numberOfFragments, fragCount);
+        }
+
+        // If this fires, then we split into fewer fragments than we asked for, and we are using
+        // the default, unwind-data-defined 512K maximum fragment size. We won't be able to fit
+        // this fragment into the unwind data! If you set COMPlus_JitSplitFunctionSize to something
+        // small, we might not be able to split into as many fragments as asked for, because we
+        // can't split prologs or epilogs.
+        assert(maxFragmentSize != UW_MAX_FRAGMENT_SIZE_BYTES);
+    }
+#endif // DEBUG
+}
+
+/*static*/ void UnwindInfo::EmitSplitCallback(void* context, emitLocation* emitLoc)
+{
+    UnwindInfo* puwi = (UnwindInfo*)context;
+    puwi->AddFragment(emitLoc);
+}
+
+// Reserve space for the unwind info for all fragments
+
+void UnwindInfo::Reserve(bool isFunclet, bool isHotCode)
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(isHotCode || !isFunclet);
+
+    for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->Reserve(isFunclet, isHotCode);
+    }
+}
+
+// Allocate and populate VM unwind info for all fragments
+
+void UnwindInfo::Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, bool isHotCode)
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+
+    UnwindFragmentInfo* pFrag;
+
+    // First, finalize all the offsets (the location of the beginning of fragments, and epilogs),
+    // so a fragment can use the finalized offset of the subsequent fragment to determine its code size.
+
+    UNATIVE_OFFSET endOffset;
+
+    if (uwiEndLoc == NULL)
+    {
+        assert(uwiComp->info.compNativeCodeSize != 0);
+        endOffset = uwiComp->info.compNativeCodeSize;
+    }
+    else
+    {
+        endOffset = uwiEndLoc->CodeOffset(uwiComp->GetEmitter());
+    }
+
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->FinalizeOffset();
+    }
+
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->Allocate(funKind, pHotCode, pColdCode, endOffset, isHotCode);
+    }
+}
+
+void UnwindInfo::AddEpilog()
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(uwiFragmentLast != NULL);
+    uwiFragmentLast->AddEpilog();
+    CaptureLocation();
+}
+
+void UnwindInfo::CaptureLocation()
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(uwiCurLoc != NULL);
+    uwiCurLoc->CaptureLocation(uwiComp->GetEmitter());
+}
+
+void UnwindInfo::AddFragment(emitLocation* emitLoc)
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(uwiFragmentLast != NULL);
+
+    UnwindFragmentInfo* newFrag = new (uwiComp, CMK_UnwindInfo) UnwindFragmentInfo(uwiComp, emitLoc, true);
+
+#ifdef DEBUG
+    newFrag->ufiNum = uwiFragmentLast->ufiNum + 1;
+#endif // DEBUG
+
+    newFrag->CopyPrologCodes(&uwiFragmentFirst);
+    newFrag->SplitEpilogCodes(emitLoc, uwiFragmentLast);
+
+    // Link the new fragment in at the end of the fragment list
+    uwiFragmentLast->ufiNext = newFrag;
+    uwiFragmentLast          = newFrag;
+}
+
+#ifdef DEBUG
+
+void UnwindInfo::Dump(bool isHotCode, int indent)
+{
+    unsigned            count;
+    UnwindFragmentInfo* pFrag;
+
+    count = 0;
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        ++count;
+    }
+
+    printf("%*sUnwindInfo %s@0x%08p, size:%d:\n", indent, "", isHotCode ? "" : "COLD ", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  %d fragment%s\n", indent, "", count, (count != 1) ? "s" : "");
+    printf("%*s  uwiFragmentLast: 0x%08p\n", indent, "", dspPtr(uwiFragmentLast));
+    printf("%*s  uwiEndLoc: 0x%08p\n", indent, "", dspPtr(uwiEndLoc));
+    printf("%*s  uwiInitialized: 0x%08x\n", indent, "", uwiInitialized);
+
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->Dump(indent + 2);
+    }
+}
+
+#endif // DEBUG
+
+#endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index d6f9df4ce6b1e..5530998e627a6 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -132,6 +132,8 @@ const char* getRegName(regNumber reg)
     static const char* const regNames[] = {
 #if defined(TARGET_ARM64)
 #define REGDEF(name, rnum, mask, xname, wname) xname,
+#elif defined(TARGET_LOONGARCH64)
+#define REGDEF(name, rnum, mask, xname, wname) xname,
 #else
 #define REGDEF(name, rnum, mask, sname) sname,
 #endif
@@ -217,7 +219,7 @@ const char* getRegNameFloat(regNumber reg, var_types type)
         return regName;
     }
 
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
     static const char* regNamesFloat[] = {
 #define REGDEF(name, rnum, mask, xname, wname) xname,
@@ -316,6 +318,14 @@ void dspRegMask(regMaskTP regMask, size_t minSiz)
                 }
 #elif defined(TARGET_X86)
 // No register ranges
+
+#elif defined(TARGET_LOONGARCH64)
+                if (REG_A0 <= regNum && regNum <= REG_X0)
+                {
+                    regHead    = regNum;
+                    inRegRange = true;
+                    sep        = "-";
+                }
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -325,10 +335,12 @@ void dspRegMask(regMaskTP regMask, size_t minSiz)
             // We've already printed a register. Is this the end of a range?
             else if ((regNum == REG_INT_LAST) || (regNum == REG_R17) // last register before TEB
                      || (regNum == REG_R28))                         // last register before FP
-#else                                                                // TARGET_ARM64
+#elif defined(TARGET_LOONGARCH64)
+            else if ((regNum == REG_INT_LAST) || (regNum == REG_X0))
+#else                                                                // TARGET_LOONGARCH64
             // We've already printed a register. Is this the end of a range?
             else if (regNum == REG_INT_LAST)
-#endif                                                               // TARGET_ARM64
+#endif                                                               // TARGET_LOONGARCH64
             {
                 const char* nam = getRegName(regNum);
                 printf("%s%s", sep, nam);
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index af8d375219917..209149c1291f6 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -57,6 +57,8 @@ struct FloatTraits
         unsigned bits = 0xFFC00000u;
 #elif defined(TARGET_ARMARCH)
         unsigned           bits = 0x7FC00000u;
+#elif defined(TARGET_LOONGARCH64)
+        unsigned bits = 0xFFC00000u;
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -83,6 +85,8 @@ struct DoubleTraits
         unsigned long long bits = 0xFFF8000000000000ull;
 #elif defined(TARGET_ARMARCH)
         unsigned long long bits = 0x7FF8000000000000ull;
+#elif defined(TARGET_LOONGARCH64)
+        unsigned long long bits = 0xFFF8000000000000ull;
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -106,7 +110,7 @@ struct DoubleTraits
 template <typename TFp, typename TFpTraits>
 TFp FpAdd(TFp value1, TFp value2)
 {
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // If [value1] is negative infinity and [value2] is positive infinity
     //   the result is NaN.
     // If [value1] is positive infinity and [value2] is negative infinity
@@ -124,7 +128,7 @@ TFp FpAdd(TFp value1, TFp value2)
             return TFpTraits::NaN();
         }
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     return value1 + value2;
 }
@@ -142,7 +146,7 @@ TFp FpAdd(TFp value1, TFp value2)
 template <typename TFp, typename TFpTraits>
 TFp FpSub(TFp value1, TFp value2)
 {
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // If [value1] is positive infinity and [value2] is positive infinity
     //   the result is NaN.
     // If [value1] is negative infinity and [value2] is negative infinity
@@ -160,7 +164,7 @@ TFp FpSub(TFp value1, TFp value2)
             return TFpTraits::NaN();
         }
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     return value1 - value2;
 }
@@ -178,7 +182,7 @@ TFp FpSub(TFp value1, TFp value2)
 template <typename TFp, typename TFpTraits>
 TFp FpMul(TFp value1, TFp value2)
 {
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // From the ECMA standard:
     //
     // If [value1] is zero and [value2] is infinity
@@ -194,7 +198,7 @@ TFp FpMul(TFp value1, TFp value2)
     {
         return TFpTraits::NaN();
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     return value1 * value2;
 }
@@ -212,7 +216,7 @@ TFp FpMul(TFp value1, TFp value2)
 template <typename TFp, typename TFpTraits>
 TFp FpDiv(TFp dividend, TFp divisor)
 {
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // From the ECMA standard:
     //
     // If [dividend] is zero and [divisor] is zero
@@ -228,7 +232,7 @@ TFp FpDiv(TFp dividend, TFp divisor)
     {
         return TFpTraits::NaN();
     }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     return dividend / divisor;
 }
@@ -2776,7 +2780,11 @@ ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, Valu
             }
             else
             {
+#ifdef TARGET_LOONGARCH64
+                assert(typ == TYP_INT || typ == TYP_LONG);
+#else
                 assert(typ == TYP_INT);
+#endif
                 int resultVal = EvalOp<int>(func, arg0Val, arg1Val);
                 // Bin op on a handle results in a handle.
                 ValueNum handleVN = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN;
@@ -9877,7 +9885,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
         vnpUniq.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet()));
     }
 
-#if defined(FEATURE_READYTORUN) && defined(TARGET_ARMARCH)
+#if defined(FEATURE_READYTORUN) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64))
     if (call->IsR2RRelativeIndir())
     {
 #ifdef DEBUG
@@ -9892,7 +9900,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
         // in morph. So we do not need to use EntryPointAddrAsArg0, because arg0 is already an entry point addr.
         useEntryPointAddrAsArg0 = false;
     }
-#endif // FEATURE_READYTORUN && TARGET_ARMARCH
+#endif // FEATURE_READYTORUN && (TARGET_ARMARCH || TARGET_LOONGARCH64)
 
     if (nArgs == 0)
     {
diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h
index d5a81d03ca502..8a0aa8fc9bc4e 100644
--- a/src/coreclr/jit/valuenumfuncs.h
+++ b/src/coreclr/jit/valuenumfuncs.h
@@ -177,6 +177,10 @@ ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false)   // All of t
 
 #elif defined (TARGET_ARM)
 // No Hardware Intrinsics on ARM32
+
+#elif defined (TARGET_LOONGARCH64)
+    //TODO: add LoongArch64's Hardware Instructions.
+
 #else
 #error Unsupported platform
 #endif

From 98a8b9db19142395c419dc47eb4651a6a86a45f1 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Sat, 18 Dec 2021 15:05:26 +0800
Subject: [PATCH 02/46] [LoongArch64] add jit/CMakeLists.txt from #62889.

---
 src/coreclr/jit/CMakeLists.txt | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt
index c46cdd18a164e..3c6febd86ad53 100644
--- a/src/coreclr/jit/CMakeLists.txt
+++ b/src/coreclr/jit/CMakeLists.txt
@@ -41,6 +41,9 @@ function(create_standalone_jit)
   elseif(TARGETDETAILS_ARCH STREQUAL "s390x")
     set(JIT_ARCH_SOURCES ${JIT_S390X_SOURCES})
     set(JIT_ARCH_HEADERS ${JIT_S390X_HEADERS})
+  elseif(TARGETDETAILS_ARCH STREQUAL "loongarch64")
+    set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES})
+    set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS})
   else()
     clr_unknown_arch()
   endif()
@@ -224,6 +227,17 @@ set( JIT_S390X_SOURCES
   # Not supported as JIT target
 )
 
+set( JIT_LOONGARCH64_SOURCES
+  codegenloongarch64.cpp
+  emitloongarch64.cpp
+  lowerloongarch64.cpp
+  lsraloongarch64.cpp
+  targetloongarch64.cpp
+  unwindloongarch64.cpp
+  ##hwintrinsiclistloongarch64.cpp         ###TODO:Not implemented on loongarch64 yet.
+  ##simdashwintrinsiclistloongarch64.cpp   ###TODO:Not implemented on loongarch64 yet.
+)
+
 # We include the headers here for better experience in IDEs.
 set( JIT_HEADERS
   ../inc/corinfo.h
@@ -366,6 +380,15 @@ set ( JIT_S390X_HEADERS
   # Not supported as JIT target
 )
 
+set( JIT_LOONGARCH64_HEADERS
+    emitloongarch64.h
+    emitfmtsloongarch64.h
+    instrsloongarch64.h
+    registerloongarch64.h
+#hwintrinsiclistloongarch64.h     ###TODO:Not implemented on loongarch64 yet.
+#simdashwintrinsiclistloongarch64.h
+)
+
 convert_to_absolute_path(JIT_SOURCES ${JIT_SOURCES})
 convert_to_absolute_path(JIT_HEADERS ${JIT_HEADERS})
 convert_to_absolute_path(JIT_RESOURCES ${JIT_RESOURCES})
@@ -382,6 +405,8 @@ convert_to_absolute_path(JIT_ARM64_SOURCES ${JIT_ARM64_SOURCES})
 convert_to_absolute_path(JIT_ARM64_HEADERS ${JIT_ARM64_HEADERS})
 convert_to_absolute_path(JIT_S390X_SOURCES ${JIT_S390X_SOURCES})
 convert_to_absolute_path(JIT_S390X_HEADERS ${JIT_S390X_HEADERS})
+convert_to_absolute_path(JIT_LOONGARCH64_SOURCES ${JIT_LOONGARCH64_SOURCES})
+convert_to_absolute_path(JIT_LOONGARCH64_HEADERS ${JIT_LOONGARCH64_HEADERS})
 
 if(CLR_CMAKE_TARGET_ARCH_AMD64)
   set(JIT_ARCH_SOURCES ${JIT_AMD64_SOURCES})
@@ -398,6 +423,9 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
 elseif(CLR_CMAKE_TARGET_ARCH_S390X)
   set(JIT_ARCH_SOURCES ${JIT_S390X_SOURCES})
   set(JIT_ARCH_HEADERS ${JIT_S390X_HEADERS})
+elseif(CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+  set(JIT_ARCH_SOURCES ${JIT_LOONGARCH64_SOURCES})
+  set(JIT_ARCH_HEADERS ${JIT_LOONGARCH64_HEADERS})
 else()
   clr_unknown_arch()
 endif()
@@ -535,11 +563,12 @@ install_clr(TARGETS clrjit DESTINATIONS . sharedFramework COMPONENT jit)
 # Enable profile guided optimization
 add_pgo(clrjit)
 
-if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
+if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
   create_standalone_jit(TARGET clrjit_universal_arm64_${ARCH_HOST_NAME} OS universal ARCH arm64 DESTINATIONS .)
   create_standalone_jit(TARGET clrjit_unix_x64_${ARCH_HOST_NAME} OS unix ARCH x64 DESTINATIONS .)
   create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .)
-endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
+  create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .)
+endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
 
 create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .)
 target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI)

From 0b561c4915a5b4787777878a7503be1ff09e3e0f Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Fri, 7 Jan 2022 13:02:13 +0800
Subject: [PATCH 03/46] [LoongArch64] update LoongArch64 after merge from main.

---
 src/coreclr/jit/codegen.h              |   2 +-
 src/coreclr/jit/codegencommon.cpp      |  15 ++
 src/coreclr/jit/codegenloongarch64.cpp | 332 ++-----------------------
 src/coreclr/jit/emitloongarch64.cpp    |  15 +-
 src/coreclr/jit/lclvars.cpp            |   2 +-
 src/coreclr/jit/lower.cpp              |   8 +-
 src/coreclr/jit/lowerloongarch64.cpp   |  15 +-
 src/coreclr/jit/lsraloongarch64.cpp    |   8 +-
 8 files changed, 58 insertions(+), 339 deletions(-)

diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 52161b44ea34e..11e6eb7edfe0c 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -614,7 +614,7 @@ class CodeGen final : public CodeGenInterface
 #endif
 
 #if defined(DEBUG) && defined(TARGET_LOONGARCH64)
-    void genLOONGARCH64EmitterUnitTests();
+    void genLoongArch64EmitterUnitTests();
 #endif
 
 #if defined(DEBUG) && defined(LATE_DISASM) && defined(TARGET_AMD64)
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index daddd89ab35b2..832471bd44798 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -12205,6 +12205,20 @@ void CodeGen::genStructReturn(GenTree* treeNode)
         GenTreeLclVar* lclNode = actualOp1->AsLclVar();
         LclVarDsc*     varDsc  = compiler->lvaGetDesc(lclNode);
         assert(varDsc->lvIsMultiRegRet);
+#ifdef TARGET_LOONGARCH64
+        var_types type = retTypeDesc.GetReturnRegType(0);
+        regNumber toReg  = retTypeDesc.GetABIReturnReg(0);
+        GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0);
+        if (regCount > 1)
+        {
+            assert(regCount == 2);
+            int offset = genTypeSize(type);
+            type = retTypeDesc.GetReturnRegType(1);
+            offset = offset < genTypeSize(type) ? genTypeSize(type) : offset;
+            toReg  = retTypeDesc.GetABIReturnReg(1);
+            GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
+        }
+#else
         int offset = 0;
         for (unsigned i = 0; i < regCount; ++i)
         {
@@ -12213,6 +12227,7 @@ void CodeGen::genStructReturn(GenTree* treeNode)
             GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
             offset += genTypeSize(type);
         }
+#endif
     }
     else
     {
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 49cc67b50dc36..c5f407f3be311 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -3161,9 +3161,10 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
             case GT_MUL:
                 if ((attr == EA_8BYTE) || (attr == EA_BYREF))
                 {
-                    //if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
-                    //    ins = INS_mul_d;
-                    //else
+                    op2  = treeNode->gtGetOp2();
+                    if (genActualTypeIsInt(op1) && genActualTypeIsInt(op2))
+                        ins = treeNode->IsUnsigned() ? INS_mulw_d_wu : INS_mulw_d_w;
+                    else
                         ins = INS_mul_d;
                 }
                 else
@@ -3442,8 +3443,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
             dataReg = data->GetRegNum();
         }
 
-        var_types   type = tree->TypeGet();
-        instruction ins  = ins_Store(type);
+        var_types type = tree->TypeGet();
+        instruction ins = ins_Store(type);
 
         if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
         {
@@ -4361,7 +4362,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                     switch (cmpSize)
                     {
                     case EA_4BYTE:
-                        if (op1->gtFlags & GTF_UNSIGNED)
+                        if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED))
                             imm = static_cast<uint32_t>(imm);
                         else
                             imm = static_cast<int32_t>(imm);
@@ -4551,7 +4552,15 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
         if (op2->AsIntCon()->gtIconVal)
         {
             assert(reg != REG_R21);
-            GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, op2->AsIntCon()->gtIconVal);
+            ssize_t imm = op2->AsIntCon()->gtIconVal;
+            if ((tree->gtFlags & GTF_UNSIGNED) && (attr == EA_4BYTE))
+            {
+                assert(reg != REG_RA);
+                imm = (int32_t)imm;
+                GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0);
+                reg = REG_RA;
+            }
+            GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
             regs = (int)reg << 5;
             regs |= (int)REG_R21;//REG_R21
             ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne;
@@ -5017,297 +5026,6 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
 
 #endif // FEATURE_SIMD
 
-#ifdef FEATURE_HW_INTRINSICS
-#include "hwintrinsic.h"
-
-instruction CodeGen::getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-    return INS_invalid;
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsic: Produce code for a GT_HWINTRINSIC node.
-//
-// This is the main routine which in turn calls the genHWIntrinsicXXX() routines.
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicUnaryOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form UnaryOp.
-//
-// Consumes one scalar operand produces a scalar
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicCrcOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form CrcOp.
-//
-// Consumes two scalar operands and produces a scalar result
-//
-// This form differs from BinaryOp because the attr depends on the size of op2
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicSimdBinaryOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form SimdBinaryOp.
-//
-// Consumes two SIMD operands and produces a SIMD result
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicSwitchTable: generate the jump-table for imm-intrinsics
-//    with non-constant argument
-//
-// Arguments:
-//    swReg      - register containing the switch case to execute
-//    tmpReg     - temporary integer register for calculating the switch indirect branch target
-//    swMax      - the number of switch cases.
-//    emitSwCase - lambda to generate an individual switch case
-//
-// Notes:
-//    Used for cases where an instruction only supports immediate operands,
-//    but at jit time the operand is not a constant.
-//
-//    The importer is responsible for inserting an upstream range check
-//    (GT_HW_INTRINSIC_CHK) for swReg, so no range check is needed here.
-//
-template <typename HWIntrinsicSwitchCaseBody>
-void CodeGen::genHWIntrinsicSwitchTable(regNumber                 swReg,
-                                        regNumber                 tmpReg,
-                                        int                       swMax,
-                                        HWIntrinsicSwitchCaseBody emitSwCase)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicSimdExtractOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form SimdExtractOp.
-//
-// Consumes one SIMD operand and one scalar
-//
-// The element index operand is typically a const immediate
-// When it is not, a switch table is generated
-//
-// See genHWIntrinsicSwitchTable comments
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicSimdInsertOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form SimdInsertOp.
-//
-// Consumes one SIMD operand and two scalars
-//
-// The element index operand is typically a const immediate
-// When it is not, a switch table is generated
-//
-// See genHWIntrinsicSwitchTable comments
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicSimdSelectOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form SimdSelectOp.
-//
-// Consumes three SIMD operands and produces a SIMD result
-//
-// This intrinsic form requires one of the source registers to be the
-// destination register.  Inserts a INS_mov if this requirement is not met.
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicSimdSetAllOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form SimdSetAllOp.
-//
-// Consumes single scalar operand and produces a SIMD result
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicSimdSetAllOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicSimdUnaryOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form SimdUnaryOp.
-//
-// Consumes single SIMD operand and produces a SIMD result
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicSimdBinaryRMWOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form SimdBinaryRMWOp.
-//
-// Consumes two SIMD operands and produces a SIMD result.
-// First operand is both source and destination.
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicSimdBinaryRMWOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicSimdTernaryRMWOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form SimdTernaryRMWOp
-//
-// Consumes three SIMD operands and produces a SIMD result.
-// First operand is both source and destination.
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicSimdTernaryRMWOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicShaHashOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form Sha1HashOp.
-// Used in LOONGARCH64 SHA1 Hash operations.
-//
-// Consumes three operands and returns a Simd result.
-// First Simd operand is both source and destination.
-// Second Operand is an unsigned int.
-// Third operand is a simd operand.
-
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicShaHashOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-//------------------------------------------------------------------------
-// genHWIntrinsicShaRotateOp:
-//
-// Produce code for a GT_HWINTRINSIC node with form Sha1RotateOp.
-// Used in LOONGARCH64 SHA1 Rotate operations.
-//
-// Consumes one integer operand and returns unsigned int result.
-//
-// Arguments:
-//    node - the GT_HWINTRINSIC node
-//
-// Return Value:
-//    None.
-//
-void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-#endif // FEATURE_HW_INTRINSICS
-
 /*****************************************************************************
  * Unit testing of the LOONGARCH64 emitter: generate a bunch of instructions into the prolog
  * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
@@ -5319,7 +5037,7 @@ void CodeGen::genHWIntrinsicShaRotateOp(GenTreeHWIntrinsic* node)
 //#define ALL_LOONGARCH64_EMITTER_UNIT_TESTS
 
 #if defined(DEBUG)
-void CodeGen::genLOONGARCH64EmitterUnitTests()
+void CodeGen::genLoongArch64EmitterUnitTests()
 {
     if (!verbose)
     {
@@ -5333,9 +5051,9 @@ void CodeGen::genLOONGARCH64EmitterUnitTests()
     }
 
     // Mark the "fake" instructions in the output.
-    printf("*************** In genLOONGARCH64EmitterUnitTests()\n");
+    printf("*************** In genLoongArch64EmitterUnitTests()\n");
 
-    printf("*************** End of genLOONGARCH64EmitterUnitTests()\n");
+    printf("*************** End of genLoongArch64EmitterUnitTests()\n");
 }
 #endif // defined(DEBUG)
 
@@ -5758,13 +5476,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             instGen(INS_nop);
             break;
 
-        case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
-        case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
-#ifdef FEATURE_HW_INTRINSICS
-        case GT_HW_INTRINSIC_CHK:
-#endif // FEATURE_HW_INTRINSICS
+        case GT_BOUNDS_CHECK:
             genRangeCheck(treeNode);
             break;
 
@@ -6571,11 +6283,11 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
 }
 
 //------------------------------------------------------------------------
-// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node.
+// genRangeCheck: generate code for GT_BOUNDS_CHECK node.
 //
 void CodeGen::genRangeCheck(GenTree* oper)
 {
-    noway_assert(oper->OperIsBoundsCheck());
+    noway_assert(oper->OperIs(GT_BOUNDS_CHECK));
     GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
 
     GenTree* arrLen    = bndsChk->GetArrayLength();
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index a5492bee3394b..f7c06ad5cc7f6 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -1135,7 +1135,8 @@ void emitter::emitIns_Mov(
 {//TODO: should amend for LoongArch64/LOONGARCH64.
     assert(IsMovInstruction(ins));
 
-    emitIns_R_R(ins, attr, dstReg, srcReg);
+    if (!canSkip || (dstReg != srcReg))
+        emitIns_R_R(ins, attr, dstReg, srcReg);
 }
 
 /*****************************************************************************
@@ -1605,8 +1606,8 @@ void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, re
         //
         assert(!EA_IS_RELOC(attr));
         emitIns_I_la(attr, reg1, imm);
-        //codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm);
-        emitIns_R_R_R(ins, attr, reg1, reg2, reg1);
+        assert(ins == INS_ld_d);
+        emitIns_R_R_R(INS_ldx_d, attr, reg1, reg2, reg1);
     }
 }
 
@@ -6317,10 +6318,12 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
                 }
                 else
                 {
+                    assert(REG_RA != dst->GetRegNum());
+                    assert(REG_RA != src1->GetRegNum());
+                    assert(REG_RA != src2->GetRegNum());
                     size_t imm = (EA_SIZE(attr) == EA_8BYTE) ? 63 : 31;
-                    emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_T0, dst->GetRegNum(), imm);
-                    //TODO: FIXME:should confirm reg REG_T0!
-                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_T0);
+                    emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_RA, dst->GetRegNum(), imm);
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_RA);
                 }
             }
         }
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 23e0761a8515f..2dff5054a8cc8 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -645,7 +645,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
 
 #if defined(TARGET_LOONGARCH64)
         int flags = 0;
-        CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd, &flags);
+        CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType2(&info.compMethodInfo->args, argLst, &typeHnd, &flags);
 #else
         CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd);
 #endif
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 0e0c6394d9d88..86925234b877b 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -3649,10 +3649,16 @@ void Lowering::LowerStoreSingleRegCallStruct(GenTreeBlk* store)
     assert(!call->HasMultiRegRetVal());
 
     const ClassLayout* layout  = store->GetLayout();
-    const var_types    regType = layout->GetRegisterType();
+    var_types          regType = layout->GetRegisterType();
 
     if (regType != TYP_UNDEF)
     {
+#if defined(TARGET_LOONGARCH64)
+        if (varTypeIsFloating(call->TypeGet()))
+            regType = call->TypeGet();
+        assert(regType != TYP_UNDEF);
+        assert(regType != TYP_STRUCT);
+#endif
         store->ChangeType(regType);
         store->SetOper(GT_STOREIND);
         LowerStoreIndirCommon(store->AsStoreInd());
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 0b77c8a27d3b8..3f5df4552c731 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -87,12 +87,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
             case GT_LE:
             case GT_GE:
             case GT_GT:
-#ifdef FEATURE_SIMD
-            case GT_SIMD_CHK:
-#endif
-#ifdef FEATURE_HW_INTRINSICS
-            case GT_HW_INTRINSIC_CHK:
-#endif
+            case GT_BOUNDS_CHECK:
                 return ((-32768 <= immVal) && (immVal <= 32767));
             case GT_AND:
             case GT_OR:
@@ -1335,10 +1330,6 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call)
 //
 void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node)
 {
-#if 0
-assert(!"unimplemented on LOONGARCH yet");
-#else
-
     GenTree* src = node->Data();
     if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
     {
@@ -1347,8 +1338,6 @@ assert(!"unimplemented on LOONGARCH yet");
     }
 
     ContainCheckIndir(node);
-
-#endif
 }
 
 //------------------------------------------------------------------------
@@ -1547,7 +1536,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp)
 //
 void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node)
 {
-    assert(node->OperIsBoundsCheck());
+    assert(node->OperIs(GT_BOUNDS_CHECK));
     if (!CheckImmedAndMakeContained(node, node->GetIndex()))
     {
         CheckImmedAndMakeContained(node, node->GetArrayLength());
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 8ce30f7bb6855..ba2f6f3536f24 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -621,13 +621,7 @@ int LinearScan::BuildNode(GenTree* tree)
         }
         break;
 
-        case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
-        case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
-#ifdef FEATURE_HW_INTRINSICS
-        case GT_HW_INTRINSIC_CHK:
-#endif // FEATURE_HW_INTRINSICS
+        case GT_BOUNDS_CHECK:
         {
             GenTreeBoundsChk* node = tree->AsBoundsChk();
             // Consumes arrLen & index - has no result

From a5424e895fe95f9abe619ed6df17f95f08bd9d54 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Mon, 10 Jan 2022 12:16:04 +0800
Subject: [PATCH 04/46] [LoongArch64] Fix the error for "IsLoongArch64".

---
 src/coreclr/inc/targetosarch.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/inc/targetosarch.h b/src/coreclr/inc/targetosarch.h
index b2d1c06a22d66..9025a8608af0f 100644
--- a/src/coreclr/inc/targetosarch.h
+++ b/src/coreclr/inc/targetosarch.h
@@ -41,27 +41,38 @@ class TargetArchitecture
     static const bool IsArm64 = false;
     static const bool IsArm32 = true;
     static const bool IsArmArch = true;
+    static const bool IsLoongArch64 = false;
 #elif defined(TARGET_ARM64)
     static const bool IsX86 = false;
     static const bool IsX64 = false;
     static const bool IsArm64 = true;
     static const bool IsArm32 = false;
     static const bool IsArmArch = true;
+    static const bool IsLoongArch64 = false;
 #elif defined(TARGET_AMD64)
     static const bool IsX86 = false;
     static const bool IsX64 = true;
     static const bool IsArm64 = false;
     static const bool IsArm32 = false;
     static const bool IsArmArch = false;
+    static const bool IsLoongArch64 = false;
 #elif defined(TARGET_X86)
     static const bool IsX86 = true;
     static const bool IsX64 = false;
     static const bool IsArm64 = false;
     static const bool IsArm32 = false;
     static const bool IsArmArch = false;
+    static const bool IsLoongArch64 = false;
+#elif defined(TARGET_LOONGARCH64)
+    static const bool IsX86 = false;
+    static const bool IsX64 = false;
+    static const bool IsArm64 = false;
+    static const bool IsArm32 = false;
+    static const bool IsArmArch = false;
+    static const bool IsLoongArch64 = true;
 #else
 #error Unknown architecture
 #endif
 };
 
-#endif // targetosarch_h
\ No newline at end of file
+#endif // targetosarch_h

From 3be6cbcdc49d8a860f7fcde3067f1bb9151e4379 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Mon, 10 Jan 2022 12:44:23 +0800
Subject: [PATCH 05/46] [LoongArch64] Fix the cross-compiling error.

---
 src/coreclr/jit/importer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 7d256c481a70c..2c874beb70868 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -17731,7 +17731,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
             // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF.
             assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
 #ifdef TARGET_LOONGARCH64
-                      genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType) ||
+                      (genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType)) ||
 #endif
                       ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) ||
                       (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) ||

From 8ded978c65fc7f51ef651dfb34336a735900798c Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Mon, 10 Jan 2022 14:48:44 +0800
Subject: [PATCH 06/46] [LoongArch64] Fixed the compiling errors after merge.

---
 src/coreclr/jit/CMakeLists.txt              |  8 +++++---
 src/coreclr/jit/ICorJitInfo_API_wrapper.hpp | 18 +-----------------
 src/coreclr/jit/jit.h                       |  2 +-
 3 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt
index 3c6febd86ad53..ed86960817e7c 100644
--- a/src/coreclr/jit/CMakeLists.txt
+++ b/src/coreclr/jit/CMakeLists.txt
@@ -563,12 +563,14 @@ install_clr(TARGETS clrjit DESTINATIONS . sharedFramework COMPONENT jit)
 # Enable profile guided optimization
 add_pgo(clrjit)
 
-if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+#if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
   create_standalone_jit(TARGET clrjit_universal_arm64_${ARCH_HOST_NAME} OS universal ARCH arm64 DESTINATIONS .)
   create_standalone_jit(TARGET clrjit_unix_x64_${ARCH_HOST_NAME} OS unix ARCH x64 DESTINATIONS .)
   create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .)
-  create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .)
-endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+  #create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .)
+endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
+#endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
 
 create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .)
 target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI)
diff --git a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp
index 8326f17cfc7ee..1e2627ccb3c9a 100644
--- a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp
+++ b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp
@@ -986,30 +986,14 @@ CORINFO_ARG_LIST_HANDLE WrapICorJitInfo::getArgNext(
 CorInfoTypeWithMod WrapICorJitInfo::getArgType(
           CORINFO_SIG_INFO* sig,
           CORINFO_ARG_LIST_HANDLE args,
-          CORINFO_CLASS_HANDLE* vcTypeRet
-#if defined(TARGET_LOONGARCH64)
-          ,int *flags = NULL
-#endif
-)
+          CORINFO_CLASS_HANDLE* vcTypeRet)
 {
     API_ENTER(getArgType);
-#if defined(TARGET_LOONGARCH64)
-    CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet, flags);
-#else
     CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet);
-#endif
     API_LEAVE(getArgType);
     return temp;
 }
 
-uint32_t WrapICorJitInfo::getFieldTypeByHnd(CORINFO_CLASS_HANDLE cls)
-{
-    API_ENTER(getFieldTypeByHnd);
-    DWORD temp = wrapHnd->getFieldTypeByHnd(cls);
-    API_LEAVE(getFieldTypeByHnd);
-    return temp;
-}
-
 CORINFO_CLASS_HANDLE WrapICorJitInfo::getArgClass(
           CORINFO_SIG_INFO* sig,
           CORINFO_ARG_LIST_HANDLE args)
diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h
index a6a6977be0225..987ea401c503a 100644
--- a/src/coreclr/jit/jit.h
+++ b/src/coreclr/jit/jit.h
@@ -214,7 +214,7 @@
 #elif defined(TARGET_ARM64)
 #define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARM64 // 0xAA64
 #elif defined(TARGET_LOONGARCH64)
-#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_LOONGARCH64 // 0xDD64
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_LOONGARCH64 // 0x6264
 #else
 #error Unsupported or unset target architecture
 #endif

From eba508b1274272f58b24ecefd470212b878f5e40 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Mon, 10 Jan 2022 17:48:14 +0800
Subject: [PATCH 07/46] [LoongArch64] revert
 `src/coreclr/jit/ICorJitInfo_API_names.h`.

---
 src/coreclr/jit/ICorJitInfo_API_names.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/coreclr/jit/ICorJitInfo_API_names.h b/src/coreclr/jit/ICorJitInfo_API_names.h
index f9597085d11df..d373091453220 100644
--- a/src/coreclr/jit/ICorJitInfo_API_names.h
+++ b/src/coreclr/jit/ICorJitInfo_API_names.h
@@ -122,7 +122,6 @@ DEF_CLR_API(getMethodNameFromMetadata)
 DEF_CLR_API(getMethodHash)
 DEF_CLR_API(findNameOfToken)
 DEF_CLR_API(getSystemVAmd64PassStructInRegisterDescriptor)
-DEF_CLR_API(getFieldTypeByHnd)
 DEF_CLR_API(getThreadTLSIndex)
 DEF_CLR_API(getInlinedCallFrameVptr)
 DEF_CLR_API(getAddrOfCaptureThreadGlobal)

From e1b998665a3c2d6298f485764af41328fd8d2acc Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Mon, 10 Jan 2022 19:18:56 +0800
Subject: [PATCH 08/46] [LoongArch64] workround the compiling error on windows.

---
 src/coreclr/jit/importer.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 2c874beb70868..f06e2be8c5a7d 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -17729,14 +17729,20 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
             op2 = impImplicitIorI4Cast(op2, info.compRetType);
             op2 = impImplicitR4orR8Cast(op2, info.compRetType);
             // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF.
-            assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
 #ifdef TARGET_LOONGARCH64
+            assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
                       (genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType)) ||
-#endif
                       ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) ||
                       (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) ||
                       (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) ||
                       (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType)));
+#else
+            assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
+                      ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) ||
+                      (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) ||
+                      (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) ||
+                      (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType)));
+#endif
 
 #ifdef DEBUG
             if (!isTailCall && opts.compGcChecks && (info.compRetType == TYP_REF))

From d5133483a426bd101022f0ef41fe0d85ef58bc29 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Tue, 11 Jan 2022 15:31:59 +0800
Subject: [PATCH 09/46] [LoongArch64] amend the code-format.

---
 src/coreclr/jit/codegen.h                   |   12 +-
 src/coreclr/jit/codegencommon.cpp           |  231 +-
 src/coreclr/jit/codegenlinear.cpp           |    6 +-
 src/coreclr/jit/codegenloongarch64.cpp      | 1358 ++++---
 src/coreclr/jit/compiler.cpp                |   10 +-
 src/coreclr/jit/compiler.h                  |   21 +-
 src/coreclr/jit/compiler.hpp                |    4 +-
 src/coreclr/jit/ee_il_dll.cpp               |    2 +-
 src/coreclr/jit/emit.cpp                    |  103 +-
 src/coreclr/jit/emit.h                      |   44 +-
 src/coreclr/jit/emitloongarch64.cpp         | 3795 ++++++++++---------
 src/coreclr/jit/emitloongarch64.h           |   84 +-
 src/coreclr/jit/gentree.cpp                 |   22 +-
 src/coreclr/jit/importer.cpp                |  118 +-
 src/coreclr/jit/instr.cpp                   |   18 +-
 src/coreclr/jit/instrsloongarch64.h         |    1 -
 src/coreclr/jit/jit.h                       |    2 +-
 src/coreclr/jit/lclvars.cpp                 |   70 +-
 src/coreclr/jit/lower.cpp                   |    2 +-
 src/coreclr/jit/lowerloongarch64.cpp        |   39 +-
 src/coreclr/jit/lsra.cpp                    |   13 +-
 src/coreclr/jit/lsra.h                      |   14 +-
 src/coreclr/jit/lsraloongarch64.cpp         |   20 +-
 src/coreclr/jit/morph.cpp                   |  127 +-
 src/coreclr/jit/register_arg_convention.cpp |    6 +-
 src/coreclr/jit/regset.h                    |    2 +-
 src/coreclr/jit/scopeinfo.cpp               |    2 +-
 src/coreclr/jit/target.h                    |    3 +-
 src/coreclr/jit/targetloongarch64.cpp       |    4 +-
 src/coreclr/jit/unwind.cpp                  |    3 +-
 src/coreclr/jit/unwindloongarch64.cpp       |   59 +-
 src/coreclr/jit/utils.cpp                   |    4 +-
 src/coreclr/jit/valuenum.cpp                |    2 +-
 33 files changed, 3148 insertions(+), 3053 deletions(-)

diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 11e6eb7edfe0c..b44ed34f09857 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -239,7 +239,11 @@ class CodeGen final : public CodeGenInterface
 
 #ifdef TARGET_LOONGARCH64
     void genSetRegToIcon(regNumber reg, ssize_t val, var_types type);
-    void genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk = nullptr, regNumber reg2 = REG_R0);
+    void genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind,
+                                 instruction     ins,
+                                 regNumber       reg1,
+                                 BasicBlock*     failBlk = nullptr,
+                                 regNumber       reg2    = REG_R0);
 #else
     void genCheckOverflow(GenTree* tree);
 #endif
@@ -1335,7 +1339,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #endif // FEATURE_PUT_STRUCT_ARG_STK
 
 #ifdef TARGET_LOONGARCH64
-    //TODO for LOONGARCH64 : maybe delete on LA64?
+    // TODO for LOONGARCH64 : maybe delete on LA64?
     void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset);
 #endif
 
@@ -1363,7 +1367,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     BasicBlock* genCallFinally(BasicBlock* block);
     void genCodeForJumpTrue(GenTreeOp* jtrue);
 #if defined(TARGET_LOONGARCH64)
-    //TODO: refactor for LA.
+    // TODO: refactor for LA.
     void genCodeForJumpCompare(GenTreeOp* tree);
 #endif
 #if defined(TARGET_ARM64)
@@ -1401,7 +1405,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genFloatReturn(GenTree* treeNode);
 #endif // TARGET_X86
 
-#if defined(TARGET_ARM64)|| defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     void genSimpleReturn(GenTree* treeNode);
 #endif // TARGET_ARM64 || TARGET_LOONGARCH64
 
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 832471bd44798..b4966106a9dc8 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -1785,19 +1785,24 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
     else
     {
         //// Ngen case - GS cookie constant needs to be accessed through an indirection.
-        //instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
-        //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0);
+        // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0);
         if (compiler->opts.compReloc)
         {
-            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst,
+                                       (ssize_t)compiler->gsGlobalSecurityCookieAddr);
         }
         else
-        {////TODO:LoongArch64 should amend for optimize!
-            //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
-            //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, );
-            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000)>>12);
-            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
-            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff)>>2);
+        { ////TODO:LoongArch64 should amend for optimize!
+            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst,
+            // (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst,
+                                      ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst,
+                                      (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst,
+                                        ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2);
         }
         regSet.verifyRegUsed(regGSConst);
     }
@@ -1809,7 +1814,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
     GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue);
 
     // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
-    genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);//no branch-delay!
+    genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
     genDefineTempLabel(gsCheckBlk);
 }
 #endif // TARGET_LOONGARCH64
@@ -1921,8 +1926,8 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi
     }
     else
     {
-        // The code to throw the exception will be generated inline, and
-        //  we will jump around it in the normal non-exception case.
+// The code to throw the exception will be generated inline, and
+//  we will jump around it in the normal non-exception case.
 
 #ifndef TARGET_LOONGARCH64
         BasicBlock*  tgtBlk          = nullptr;
@@ -3324,8 +3329,8 @@ void CodeGen::genFnPrologCalleeRegArgs()
     noway_assert(regArgMaskLive != 0);
 
     unsigned varNum;
-    unsigned regArgsVars[MAX_REG_ARG*2] = {0};
-    unsigned regArgNum = 0;
+    unsigned regArgsVars[MAX_REG_ARG * 2] = {0};
+    unsigned regArgNum                    = 0;
     for (varNum = 0; varNum < compiler->lvaCount; ++varNum)
     {
         LclVarDsc* varDsc = compiler->lvaTable + varNum;
@@ -3349,7 +3354,8 @@ void CodeGen::genFnPrologCalleeRegArgs()
             {
                 if (varDsc->GetArgInitReg() > REG_ARG_LAST)
                 {
-                    inst_Mov(genIsValidFloatReg(varDsc->GetArgInitReg()) ? TYP_DOUBLE : TYP_LONG, varDsc->GetArgInitReg(), varDsc->GetArgReg(), false);
+                    inst_Mov(genIsValidFloatReg(varDsc->GetArgInitReg()) ? TYP_DOUBLE : TYP_LONG,
+                             varDsc->GetArgInitReg(), varDsc->GetArgReg(), false);
                     regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
                 }
                 else
@@ -3386,7 +3392,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
             {
                 storeType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
             }
-            else //if (emitter::isGeneralRegister(varDsc->GetArgReg()))
+            else // if (emitter::isGeneralRegister(varDsc->GetArgReg()))
             {
                 assert(emitter::isGeneralRegister(varDsc->GetArgReg()));
                 if (varDsc->lvIs4Field1)
@@ -3394,7 +3400,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                 else
                     storeType = varDsc->GetLayout()->GetGCPtrType(0);
             }
-            slotSize  = (unsigned)emitActualTypeSize(storeType);
+            slotSize = (unsigned)emitActualTypeSize(storeType);
 
 #if FEATURE_MULTIREG_ARGS
             // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
@@ -3426,12 +3432,12 @@ void CodeGen::genFnPrologCalleeRegArgs()
         {
             assert(srcRegNum != varDsc->GetOtherArgReg());
 
-            int tmp_offset = 0;
-            regNumber tmp_reg = REG_NA;
+            int       tmp_offset = 0;
+            regNumber tmp_reg    = REG_NA;
 
             bool FPbased;
-            int baseOffset = 0;//(regArgTab[argNum].slot - 1) * slotSize;
-            int  base = compiler->lvaFrameAddress(varNum, &FPbased);
+            int  baseOffset = 0; //(regArgTab[argNum].slot - 1) * slotSize;
+            int  base       = compiler->lvaFrameAddress(varNum, &FPbased);
 
             base += baseOffset;
 
@@ -3444,9 +3450,9 @@ void CodeGen::genFnPrologCalleeRegArgs()
                 if (tmp_reg == REG_NA)
                 {
                     regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-                    tmp_offset = base;
-                    tmp_reg = REG_R21;
-                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                    tmp_offset     = base;
+                    tmp_reg        = REG_R21;
+                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
                     GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
                     GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
                     GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8);
@@ -3466,10 +3472,10 @@ void CodeGen::genFnPrologCalleeRegArgs()
                 if (emitter::isFloatReg(varDsc->GetOtherArgReg()))
                 {
                     baseOffset = (int)EA_SIZE(emitActualTypeSize(storeType));
-                    storeType = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE;
-                    size = EA_SIZE(emitActualTypeSize(storeType));
+                    storeType  = varDsc->lvIs4Field2 ? TYP_FLOAT : TYP_DOUBLE;
+                    size       = EA_SIZE(emitActualTypeSize(storeType));
                     baseOffset = baseOffset < (int)size ? (int)size : baseOffset;
-                    srcRegNum = varDsc->GetOtherArgReg();
+                    srcRegNum  = varDsc->GetOtherArgReg();
                 }
                 else if (emitter::isGeneralRegister(varDsc->GetOtherArgReg()))
                 {
@@ -3478,10 +3484,10 @@ void CodeGen::genFnPrologCalleeRegArgs()
                         storeType = TYP_INT;
                     else
                         storeType = varDsc->GetLayout()->GetGCPtrType(1);
-                    size = emitActualTypeSize(storeType);
+                    size          = emitActualTypeSize(storeType);
                     if (baseOffset < (int)EA_SIZE(size))
                         baseOffset = (int)EA_SIZE(size);
-                    srcRegNum = varDsc->GetOtherArgReg();
+                    srcRegNum      = varDsc->GetOtherArgReg();
                 }
 
                 if (srcRegNum == varDsc->GetOtherArgReg())
@@ -3497,9 +3503,9 @@ void CodeGen::genFnPrologCalleeRegArgs()
                         if (tmp_reg == REG_NA)
                         {
                             regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-                            tmp_offset = base;
-                            tmp_reg = REG_R21;
-                            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                            tmp_offset     = base;
+                            tmp_reg        = REG_R21;
+                            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
                             GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
                             GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
                             GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8);
@@ -3510,7 +3516,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                             GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
                         }
                     }
-                    regArgMaskLive &= ~genRegMask(srcRegNum);//maybe do this later is better!
+                    regArgMaskLive &= ~genRegMask(srcRegNum); // maybe do this later is better!
                 }
                 else if (varDsc->lvIsSplit)
                 {
@@ -3518,7 +3524,8 @@ void CodeGen::genFnPrologCalleeRegArgs()
                     baseOffset = 8;
                     base += 8;
 
-                    GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size/*EA_PTRSIZE*/, REG_SCRATCH, REG_SPBASE, genTotalFrameSize());
+                    GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size /*EA_PTRSIZE*/, REG_SCRATCH, REG_SPBASE,
+                                                  genTotalFrameSize());
                     if ((-2048 <= base) && (base < 2048))
                     {
                         GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset);
@@ -3528,9 +3535,9 @@ void CodeGen::genFnPrologCalleeRegArgs()
                         if (tmp_reg == REG_NA)
                         {
                             regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-                            tmp_offset = base;
-                            tmp_reg = REG_R21;
-                            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                            tmp_offset     = base;
+                            tmp_reg        = REG_R21;
+                            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
                             GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
                             GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
                             GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, -8);
@@ -3554,23 +3561,25 @@ void CodeGen::genFnPrologCalleeRegArgs()
 
     while (regArgNum > 0)
     {
-        varNum = regArgsVars[regArgNum - 1];
+        varNum            = regArgsVars[regArgNum - 1];
         LclVarDsc* varDsc = compiler->lvaTable + varNum;
 
         if (varDsc->GetArgInitReg() > varDsc->GetArgReg())
         {
             var_types destMemType = varDsc->TypeGet();
-            GetEmitter()->emitIns_R_R(ins_Copy(destMemType), emitActualTypeSize(destMemType), varDsc->GetArgInitReg(), varDsc->GetArgReg());
+            GetEmitter()->emitIns_R_R(ins_Copy(destMemType), emitActualTypeSize(destMemType), varDsc->GetArgInitReg(),
+                                      varDsc->GetArgReg());
             regArgNum--;
             regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
         }
         else
         {
-            for (int i=0; i < regArgNum; i++)
+            for (int i = 0; i < regArgNum; i++)
             {
-                LclVarDsc* varDsc2 = compiler->lvaTable + regArgsVars[i];
-                var_types destMemType = varDsc2->GetRegisterType();
-                inst_Mov(destMemType, varDsc2->GetArgInitReg(), varDsc2->GetArgReg(), /* canSkip */ false, emitActualTypeSize(destMemType));
+                LclVarDsc* varDsc2     = compiler->lvaTable + regArgsVars[i];
+                var_types  destMemType = varDsc2->GetRegisterType();
+                inst_Mov(destMemType, varDsc2->GetArgInitReg(), varDsc2->GetArgReg(), /* canSkip */ false,
+                         emitActualTypeSize(destMemType));
                 regArgMaskLive &= ~genRegMask(varDsc2->GetArgReg());
             }
             break;
@@ -3578,9 +3587,8 @@ void CodeGen::genFnPrologCalleeRegArgs()
     }
 
     assert(!regArgMaskLive);
-
 }
-#else  //!defined(TARGET_LOONGARCH64)
+#else //! defined(TARGET_LOONGARCH64)
 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
 {
 #ifdef DEBUG
@@ -3654,20 +3662,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
     //
     struct regArgElem
     {
-        unsigned varNum; // index into compiler->lvaTable[] for this register argument
+        unsigned  varNum;  // index into compiler->lvaTable[] for this register argument
 #if defined(UNIX_AMD64_ABI)
-        var_types type;   // the Jit type of this regArgTab entry
-#endif                    // defined(UNIX_AMD64_ABI)
-        unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
-                          // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
-                          // argument register number 'x'. Only used when circular = true.
-        char slot;        // 0 means the register is not used for a register argument
-                          // 1 means the first part of a register argument
-                          // 2, 3 or 4  means the second,third or fourth part of a multireg argument
-        bool stackArg;    // true if the argument gets homed to the stack
-        bool writeThru;   // true if the argument gets homed to both stack and register
-        bool processed;   // true after we've processed the argument (and it is in its final location)
-        bool circular;    // true if this register participates in a circular dependency loop.
+        var_types type;    // the Jit type of this regArgTab entry
+#endif // defined(UNIX_AMD64_ABI)
+        unsigned  trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
+                           // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
+                           // argument register number 'x'. Only used when circular = true.
+        char slot;         // 0 means the register is not used for a register argument
+                           // 1 means the first part of a register argument
+                           // 2, 3 or 4  means the second,third or fourth part of a multireg argument
+        bool stackArg;     // true if the argument gets homed to the stack
+        bool writeThru;    // true if the argument gets homed to both stack and register
+        bool processed;    // true after we've processed the argument (and it is in its final location)
+        bool circular;     // true if this register participates in a circular dependency loop.
 
 #ifdef UNIX_AMD64_ABI
 
@@ -4470,10 +4478,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
             else
 #endif // TARGET_XARCH
             {
-                var_types destMemType = varDscDest->TypeGet();
+                var_types destMemType    = varDscDest->TypeGet();
 
 #ifdef TARGET_ARM
-                bool cycleAllDouble = true; // assume the best
+                bool      cycleAllDouble = true; // assume the best
 
                 unsigned iter = begReg;
                 do
@@ -4898,8 +4906,8 @@ void CodeGen::genEnregisterIncomingStackArgs()
     unsigned varNum = 0;
 
 #ifdef TARGET_LOONGARCH64
-    int tmp_offset = 0;
-    regNumber tmp_reg = REG_NA;
+    int       tmp_offset = 0;
+    regNumber tmp_reg    = REG_NA;
 #endif
 
     for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
@@ -4951,8 +4959,8 @@ void CodeGen::genEnregisterIncomingStackArgs()
 #ifdef TARGET_LOONGARCH64
         {
             bool FPbased;
-            //int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
-            int  base = compiler->lvaFrameAddress(varNum, &FPbased);
+            // int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
+            int base = compiler->lvaFrameAddress(varNum, &FPbased);
 
             if ((-2048 <= base) && (base < 2048))
             {
@@ -4963,9 +4971,9 @@ void CodeGen::genEnregisterIncomingStackArgs()
                 if (tmp_reg == REG_NA)
                 {
                     regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-                    tmp_offset = base;
-                    tmp_reg = REG_R21;
-                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                    tmp_offset     = base;
+                    tmp_reg        = REG_R21;
+                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
                     GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
                     GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
                     GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8);
@@ -5959,8 +5967,9 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
 
             if (!IsSaveFpRaWithAllCalleeSavedRegisters())
             {
-                JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
-                        totalFrameSize, dspBool(compiler->compLocallocUsed));
+                JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
+                        dspBool(compiler->compLocallocUsed));
 
                 frameType = 1;
 
@@ -5974,17 +5983,18 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
 
                 calleeSaveSPOffset = compiler->compLclFrameSize;
 
-                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
-                        totalFrameSize, dspBool(compiler->compLocallocUsed));
-
+                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
+                        dspBool(compiler->compLocallocUsed));
             }
-            //calleeSaveSPDelta = 0;
+            // calleeSaveSPDelta = 0;
         }
         else
         {
             if (!IsSaveFpRaWithAllCalleeSavedRegisters())
             {
-                JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n",
+                JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; "
+                        "localloc? %s\n",
                         unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
                         dspBool(compiler->compLocallocUsed));
 
@@ -5993,13 +6003,13 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
                 int outSzAligned;
                 if (compiler->lvaOutgoingArgSpaceSize >= 2040)
                 {
-                    int offset = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES;
-                    calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
+                    int offset         = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                    calleeSaveSPDelta  = AlignUp((UINT)offset, STACK_ALIGN);
                     calleeSaveSPOffset = calleeSaveSPDelta - offset;
 
-                    int offset2 = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
+                    int offset2       = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
                     calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN);
-                    offset2 = calleeSaveSPDelta - offset2;
+                    offset2           = calleeSaveSPDelta - offset2;
 
                     if (compiler->compLocallocUsed)
                     {
@@ -6009,8 +6019,8 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
                     }
                     else
                     {
-                        outSzAligned = compiler->lvaOutgoingArgSpaceSize &  ~0xf;
-                        //if (outSzAligned > 0)
+                        outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf;
+                        // if (outSzAligned > 0)
                         {
                             genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true);
                         }
@@ -6026,7 +6036,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
 
                     genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
 
-                    calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES;
+                    calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
                     calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
                 }
                 else
@@ -6047,11 +6057,12 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
                     GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
                     compiler->unwindSaveReg(REG_FP, offset2);
 
-                    calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2*REGSIZE_BYTES;
-                    calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
+                    calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                    calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
                     calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
 
-                    genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+                    genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr,
+                                              /* reportUnwindData */ true);
                 }
             }
             else
@@ -6063,7 +6074,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
                         dspBool(compiler->compLocallocUsed));
 
                 calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize;
-                calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
+                calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
                 calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
 
                 if (compiler->compLocallocUsed)
@@ -6115,11 +6126,11 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
     }
     else if (frameType == 3)
     {
-        //genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+        // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
     }
     else if (frameType == 4)
     {
-        //genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+        // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
     }
     else
     {
@@ -6710,7 +6721,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
         regMaskTP regMask;
 
         regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
-        //see: src/jit/registerloongarch64.h
+        // see: src/jit/registerloongarch64.h
         availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
                                                             // currently live
         availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
@@ -6721,7 +6732,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
 
         // rAddr is not a live incoming argument reg
         assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0);
-        assert(untrLclLo%4 == 0);
+        assert(untrLclLo % 4 == 0);
 
         if ((-2048 <= untrLclLo) && (untrLclLo < 2048))
         {
@@ -6815,7 +6826,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
         }
         noway_assert(uCntBytes == 0);
 
-#else  // TARGET*
+#else // TARGET*
 #error Unsupported or unset target architecture
 #endif // TARGET*
     }
@@ -7190,7 +7201,7 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed
                                 compiler->lvaCachedGenericContextArgOffset());
 #elif defined(TARGET_LOONGARCH64)
     genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
-                                compiler->lvaCachedGenericContextArgOffset(), REG_R21);
+                         compiler->lvaCachedGenericContextArgOffset(), REG_R21);
 #else  // !ARM64 !ARM !LOONGARCH64
     // mov [ebp-lvaCachedGenericContextArgOffset()], reg
     GetEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
@@ -9407,20 +9418,20 @@ void CodeGen::genFnEpilog(BasicBlock* block)
             switch (addrInfo.accessType)
             {
                 case IAT_VALUE:
-                    //if (validImmForBAL((ssize_t)addrInfo.addr))
-                    //{
-                    //    // Simple direct call
+                // if (validImmForBAL((ssize_t)addrInfo.addr))
+                //{
+                //    // Simple direct call
 
-                    //    //TODO for LA.
-                    //    callType   = emitter::EC_FUNC_TOKEN;
-                    //    addr       = addrInfo.addr;
-                    //    indCallReg = REG_NA;
-                    //    break;
-                    //}
+                //    //TODO for LA.
+                //    callType   = emitter::EC_FUNC_TOKEN;
+                //    addr       = addrInfo.addr;
+                //    indCallReg = REG_NA;
+                //    break;
+                //}
 
-                    //// otherwise the target address doesn't fit in an immediate
-                    //// so we have to burn a register...
-                    //__fallthrough;
+                //// otherwise the target address doesn't fit in an immediate
+                //// so we have to burn a register...
+                //__fallthrough;
 
                 case IAT_PVALUE:
                     // Load the address into a register, load indirect and call  through a register
@@ -9490,7 +9501,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
             // https://github.com/dotnet/coreclr/issues/4827
             // Do we need a special encoding for stack walker like rex.w prefix for x64?
 
-            //TODO for LA: whether the relative address is enough for optimize?
+            // TODO for LA: whether the relative address is enough for optimize?
             GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0);
         }
 #endif // FEATURE_FASTTAILCALL
@@ -10656,7 +10667,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass, CorInfoCallCo
     structPassingKind howToReturnStruct;
     var_types         returnType = getReturnTypeForStruct(hClass, callConv, &howToReturnStruct);
 
-#if defined(TARGET_ARM64)  || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType));
 #else
     return (varTypeIsStruct(returnType));
@@ -12206,16 +12217,16 @@ void CodeGen::genStructReturn(GenTree* treeNode)
         LclVarDsc*     varDsc  = compiler->lvaGetDesc(lclNode);
         assert(varDsc->lvIsMultiRegRet);
 #ifdef TARGET_LOONGARCH64
-        var_types type = retTypeDesc.GetReturnRegType(0);
-        regNumber toReg  = retTypeDesc.GetABIReturnReg(0);
+        var_types type  = retTypeDesc.GetReturnRegType(0);
+        regNumber toReg = retTypeDesc.GetABIReturnReg(0);
         GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0);
         if (regCount > 1)
         {
             assert(regCount == 2);
             int offset = genTypeSize(type);
-            type = retTypeDesc.GetReturnRegType(1);
-            offset = offset < genTypeSize(type) ? genTypeSize(type) : offset;
-            toReg  = retTypeDesc.GetABIReturnReg(1);
+            type       = retTypeDesc.GetReturnRegType(1);
+            offset     = offset < genTypeSize(type) ? genTypeSize(type) : offset;
+            toReg      = retTypeDesc.GetABIReturnReg(1);
             GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
         }
 #else
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index e10e498466f52..92043be1edc67 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -2516,10 +2516,10 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
         }
 
 #ifdef TARGET_LOONGARCH64
-        m_extendKind    = castUnsigned ? ZERO_EXTEND_INT : SIGN_EXTEND_INT;
-        cast->gtFlags |=  castUnsigned ? GTF_UNSIGNED : GTF_EMPTY;
+        m_extendKind = castUnsigned ? ZERO_EXTEND_INT : SIGN_EXTEND_INT;
+        cast->gtFlags |= castUnsigned ? GTF_UNSIGNED : GTF_EMPTY;
 #else
-        m_extendKind    = COPY;
+        m_extendKind = COPY;
 #endif
         m_extendSrcSize = 4;
     }
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index c5f407f3be311..be858336db73d 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -24,14 +24,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "gcinfo.h"
 #include "gcinfoencoder.h"
 
-static short splitLow(int value) {
+static short splitLow(int value)
+{
     return (value & 0xffff);
 }
 
 // Returns true if 'value' is a legal signed immediate 16 bit encoding.
 static bool isValidSimm16(ssize_t value)
 {
-    return -( ((int)1) << 15 ) <= value && value < ( ((int)1) << 15 );
+    return -(((int)1) << 15) <= value && value < (((int)1) << 15);
 };
 
 // Returns true if 'value' is a legal unsigned immediate 16 bit encoding.
@@ -43,7 +44,7 @@ static bool isValidUimm16(ssize_t value)
 // Returns true if 'value' is a legal signed immediate 12 bit encoding.
 static bool isValidSimm12(ssize_t value)
 {
-    return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 );
+    return -(((int)1) << 11) <= value && value < (((int)1) << 11);
 };
 
 // Returns true if 'value' is a legal unsigned immediate 11 bit encoding.
@@ -102,7 +103,7 @@ bool CodeGen::genInstrWithConstant(instruction ins,
                                    regNumber   tmpReg,
                                    bool        inUnwindRegion /* = false */)
 {
-    emitAttr size         = EA_SIZE(attr);
+    emitAttr size = EA_SIZE(attr);
 
     // reg1 is usually a dest register
     // reg2 is always source register
@@ -260,26 +261,16 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1,
         // generate addi.d SP,SP,-imm
         genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
 
-        assert((spDelta+spOffset+16)<=0);
+        assert((spDelta + spOffset + 16) <= 0);
 
-        assert(spOffset <= 2031);//2047-16
+        assert(spOffset <= 2031); // 2047-16
     }
 
-// #ifdef OPTIMIZE_LOONGSON_EXT
-//     if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_st_d == ins))
-//     {
-//         GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4);
-//         compiler->unwindSaveRegPair(reg1, reg2, spOffset);
-//     }
-//     else
-// #endif
-    {
     GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
     compiler->unwindSaveReg(reg1, spOffset);
 
-    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8);
-    compiler->unwindSaveReg(reg2, spOffset+8);
-    }
+    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
+    compiler->unwindSaveReg(reg2, spOffset + 8);
 }
 
 //------------------------------------------------------------------------
@@ -320,7 +311,6 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum
 
     GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
     compiler->unwindSaveReg(reg1, spOffset);
-
 }
 
 //------------------------------------------------------------------------
@@ -365,42 +355,23 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1,
     if (spDelta != 0)
     {
         assert(!useSaveNextPair);
-// #ifdef OPTIMIZE_LOONGSON_EXT
-//         if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_ld_d == ins))
-//         {
-//             GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4);
-//             compiler->unwindSaveRegPair(reg1, reg2, spOffset);
-//         }
-//         else
-// #endif
-        {
-        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8);
-        compiler->unwindSaveReg(reg2, spOffset+8);
+
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
+        compiler->unwindSaveReg(reg2, spOffset + 8);
 
         GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
         compiler->unwindSaveReg(reg1, spOffset);
-        }
 
         // generate daddiu SP,SP,imm
         genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
     }
     else
     {
-// #ifdef OPTIMIZE_LOONGSON_EXT
-//         if (!(spOffset & 0xf) && (spOffset <= 0xff0) && (INS_ld_d == ins))
-//         {
-//             GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, reg2, reg1, REG_SPBASE, spOffset >> 4);
-//             compiler->unwindSaveRegPair(reg1, reg2, spOffset);
-//         }
-//         else
-// #endif
-        {
-        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset+8);
-        compiler->unwindSaveReg(reg2, spOffset+8);
+        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
+        compiler->unwindSaveReg(reg2, spOffset + 8);
 
         GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
         compiler->unwindSaveReg(reg1, spOffset);
-        }
     }
 }
 
@@ -1064,8 +1035,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
     assert((maskSaveRegsInt & RBM_RA) != 0);
     assert((maskSaveRegsInt & RBM_FP) != 0);
 
-    bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
-    int frameSize  = genFuncletInfo.fiSpDelta1;
+    bool isFilter  = (block->bbCatchTyp == BBCT_FILTER);
+    int  frameSize = genFuncletInfo.fiSpDelta1;
 
     regMaskTP maskArgRegsLiveIn;
     if (isFilter)
@@ -1096,25 +1067,18 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
         assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
         genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
 
-// #ifdef OPTIMIZE_LOONGSON_EXT
-//         if (!(genFuncletInfo.fiSP_to_FPRA_save_delta & 0xf) && (genFuncletInfo.fiSP_to_FPRA_save_delta <= 0xff0))
-//         {
-//             GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta >> 4);
-//             compiler->unwindSaveRegPair(REG_FP, REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta);
-//         }
-//         else
-// #endif
-        {
-        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE,
+                                    genFuncletInfo.fiSP_to_FPRA_save_delta);
         compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
 
-        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE,
+                                    genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
         compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
-        }
 
         maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now
 
-        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
+        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8,
+                                        0);
     }
     else if (genFuncletInfo.fiFrameType == 2)
     {
@@ -1125,34 +1089,25 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
         assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
         genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
 
-        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
+        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8,
+                                        0);
     }
     else if (genFuncletInfo.fiFrameType == 3)
     {
         // fiFrameType constraints:
         assert(frameSize < -2048);
 
-        offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
+        offset       = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
         int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
-        offset = SP_delta - offset;
+        offset       = SP_delta - offset;
 
         genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
 
-// #ifdef OPTIMIZE_LOONGSON_EXT
-//         if (!(offset & 0xf) && (offset <= 0xff0))
-//         {
-//             GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4);
-//             compiler->unwindSaveRegPair(REG_FP, REG_RA, offset);
-//         }
-//         else
-// #endif
-        {
         GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
         compiler->unwindSaveReg(REG_FP, offset);
 
         GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
         compiler->unwindSaveReg(REG_RA, offset + 8);
-        }
 
         maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now
 
@@ -1166,9 +1121,9 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
         // fiFrameType constraints:
         assert(frameSize < -2048);
 
-        offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8);
+        offset       = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8);
         int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
-        offset = SP_delta - offset;
+        offset       = SP_delta - offset;
 
         genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
 
@@ -1201,8 +1156,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
             regSet.verifyRegUsed(REG_A1);
 
             // Store the PSP value (aka CallerSP)
-            genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2,
-                                 false);
+            genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta,
+                                 REG_A2, false);
 
             // re-establish the frame pointer
             genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_A1,
@@ -1217,8 +1172,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
                                  -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false);
             regSet.verifyRegUsed(REG_A3);
 
-            genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2,
-                                 false);
+            genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta,
+                                 REG_A2, false);
         }
     }
 }
@@ -1238,7 +1193,7 @@ void CodeGen::genFuncletEpilog()
     ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
 
     bool unwindStarted = false;
-    int frameSize  = genFuncletInfo.fiSpDelta1;
+    int  frameSize     = genFuncletInfo.fiSpDelta1;
 
     if (!unwindStarted)
     {
@@ -1272,21 +1227,13 @@ void CodeGen::genFuncletEpilog()
 
         genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
 
-// #ifdef OPTIMIZE_LOONGSON_EXT
-//         if (!(genFuncletInfo.fiSP_to_FPRA_save_delta & 0xf) && (genFuncletInfo.fiSP_to_FPRA_save_delta <= 0xff0))
-//         {
-//             GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta >> 4);
-//             compiler->unwindSaveRegPair(REG_FP, REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta);
-//         }
-//         else
-// #endif
-        {
-        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE,
+                                    genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
         compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
 
-        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE,
+                                    genFuncletInfo.fiSP_to_FPRA_save_delta);
         compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
-        }
 
         // generate daddiu SP,SP,imm
         genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
@@ -1307,36 +1254,26 @@ void CodeGen::genFuncletEpilog()
         // fiFrameType constraints:
         assert(frameSize < -2048);
 
-
-        int offset = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
+        int offset   = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
         int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
-        offset = SP_delta - offset;
+        offset       = SP_delta - offset;
 
-        //first, generate daddiu SP,SP,imm
+        // first, generate daddiu SP,SP,imm
         genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
 
         int offset2 = frameSize + SP_delta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8;
-        assert(offset2 < 2040);//can amend.
+        assert(offset2 < 2040); // can amend.
 
         regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end
         genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset2, 0);
 
-// #ifdef OPTIMIZE_LOONGSON_EXT
-//         if (!(offset & 0xf) && (offset <= 0xff0))
-//         {
-//             GetEmitter()->emitIns_R_R_R_I(INS_gslq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4);
-//             compiler->unwindSaveRegPair(REG_FP, REG_RA, offset);
-//         }
-//         else
-// #endif
-        {
         GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
         compiler->unwindSaveReg(REG_RA, offset + 8);
 
         GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
         compiler->unwindSaveReg(REG_FP, offset);
-        }
-        //second, generate daddiu SP,SP,imm for remaine space.
+
+        // second, generate daddiu SP,SP,imm for remaine space.
         genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
     }
     else if (genFuncletInfo.fiFrameType == 4)
@@ -1344,9 +1281,9 @@ void CodeGen::genFuncletEpilog()
         // fiFrameType constraints:
         assert(frameSize < -2048);
 
-        int offset = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8);
+        int offset   = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8);
         int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
-        offset = SP_delta - offset;
+        offset       = SP_delta - offset;
 
         genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
 
@@ -1397,9 +1334,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
 
     unsigned saveRegsPlusPSPSize;
     if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-        saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize +PSPSize/* -2*8*/;
+        saveRegsPlusPSPSize =
+            roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize /* -2*8*/;
     else
-        saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize +PSPSize;
+        saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize;
 
     if (compiler->info.compIsVarArgs)
     {
@@ -1424,12 +1362,12 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
     unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
     assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
 
-    if (maxFuncletFrameSizeAligned <= (2048-8))
+    if (maxFuncletFrameSizeAligned <= (2048 - 8))
     {
         if (!IsSaveFpRaWithAllCalleeSavedRegisters())
         {
             genFuncletInfo.fiFrameType = 1;
-            saveRegsPlusPSPSize -= 2*8;// FP/RA
+            saveRegsPlusPSPSize -= 2 * 8; // FP/RA
         }
         else
         {
@@ -1445,7 +1383,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
         if (!IsSaveFpRaWithAllCalleeSavedRegisters())
         {
             genFuncletInfo.fiFrameType = 3;
-            saveRegsPlusPSPSize -= 2*8;// FP/RA
+            saveRegsPlusPSPSize -= 2 * 8; // FP/RA
         }
         else
         {
@@ -1454,14 +1392,13 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
         }
     }
 
-
     int CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize;
-    genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
-    int SP_to_PSP_slot_delta = funcletFrameSizeAligned - saveRegsPlusPSPSize;
+    genFuncletInfo.fiSpDelta1      = -(int)funcletFrameSizeAligned;
+    int SP_to_PSP_slot_delta       = funcletFrameSizeAligned - saveRegsPlusPSPSize;
 
     /* Now save it for future use */
-    genFuncletInfo.fiSaveRegs                   = rsMaskSaveRegs;
-    genFuncletInfo.fiSP_to_FPRA_save_delta      = SP_to_FPRA_save_delta;
+    genFuncletInfo.fiSaveRegs              = rsMaskSaveRegs;
+    genFuncletInfo.fiSP_to_FPRA_save_delta = SP_to_FPRA_save_delta;
 
     genFuncletInfo.fiSP_to_PSP_slot_delta       = SP_to_PSP_slot_delta;
     genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta;
@@ -1580,7 +1517,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
                                      regNumber reg,
                                      ssize_t   imm,
                                      insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
-{//maybe optimize.
+{ // maybe optimize.
     emitter* emit = GetEmitter();
 
     if (!compiler->opts.compReloc)
@@ -1590,12 +1527,12 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
 
     if (EA_IS_RELOC(size))
     {
-        assert(genIsValidIntReg(reg));//TODO: maybe optimize!!!
-        emit->emitIns_R_AI(INS_bl, size, reg, imm);//for example: EA_PTR_DSP_RELOC
+        assert(genIsValidIntReg(reg));              // TODO: maybe optimize!!!
+        emit->emitIns_R_AI(INS_bl, size, reg, imm); // for example: EA_PTR_DSP_RELOC
     }
     else
     {
-        emit->emitIns_I_la(size, reg, imm);//TODO: maybe optimize.
+        emit->emitIns_I_la(size, reg, imm); // TODO: maybe optimize.
     }
 
     regSet.verifyRegUsed(reg);
@@ -1618,10 +1555,10 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
             GenTreeIntConCommon* con    = tree->AsIntConCommon();
             ssize_t              cnsVal = con->IconValue();
 
-            //if (con->ImmedValNeedsReloc(compiler))
+            // if (con->ImmedValNeedsReloc(compiler))
             if (con->ImmedValNeedsReloc(compiler) && compiler->opts.compReloc)
             {
-                //instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+                // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
                 assert(compiler->opts.compReloc);
                 GetEmitter()->emitIns_R_AI(INS_bl, EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
                 regSet.verifyRegUsed(targetReg);
@@ -1654,7 +1591,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
             else
             {
                 // Get a temp integer register to compute long address.
-                //regNumber addrReg = tree->GetSingleTempReg();
+                // regNumber addrReg = tree->GetSingleTempReg();
 
                 // We must load the FP constant from the constant pool
                 // Emit a data section constant for the float or double constant.
@@ -1747,12 +1684,11 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
 // This method is expected to have called genConsumeOperands() before calling it.
 void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
 {
-    const genTreeOps oper       = treeNode->OperGet();
-    regNumber        targetReg  = treeNode->GetRegNum();
-    emitter*         emit       = GetEmitter();
+    const genTreeOps oper      = treeNode->OperGet();
+    regNumber        targetReg = treeNode->GetRegNum();
+    emitter*         emit      = GetEmitter();
 
-    assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_AND ||
-           oper == GT_OR || oper == GT_XOR);
+    assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_AND || oper == GT_OR || oper == GT_XOR);
 
     GenTree*    op1 = treeNode->gtGetOp1();
     GenTree*    op2 = treeNode->gtGetOp2();
@@ -1788,10 +1724,10 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
 
     if (!isRegCandidate && !(tree->gtFlags & GTF_SPILLED))
     {
-        var_types  targetType = varDsc->GetRegisterType(tree);
-        //if (tree->gtFlags & GTF_UNSIGNED)
+        var_types targetType = varDsc->GetRegisterType(tree);
+        // if (tree->gtFlags & GTF_UNSIGNED)
         //    targetType = varTypeSignedToUnsigned(targetType);//uuuuu.
-        emitter*  emit       = GetEmitter();
+        emitter* emit = GetEmitter();
 
         // targetType must be a normal scalar type and not a TYP_STRUCT
         assert(targetType != TYP_STRUCT);
@@ -1890,13 +1826,13 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
         return;
     }
 
-    regNumber targetReg  = lclNode->GetRegNum();
-    emitter*  emit       = GetEmitter();
+    regNumber targetReg = lclNode->GetRegNum();
+    emitter*  emit      = GetEmitter();
 
     unsigned varNum = lclNode->GetLclNum();
     assert(varNum < compiler->lvaCount);
-    LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-    var_types targetType = varDsc->GetRegisterType(lclNode);
+    LclVarDsc* varDsc     = &(compiler->lvaTable[varNum]);
+    var_types  targetType = varDsc->GetRegisterType(lclNode);
 
     if (lclNode->IsMultiReg())
     {
@@ -2055,7 +1991,9 @@ void CodeGen::genSimpleReturn(GenTree* treeNode)
         }
         else
         {
-            if (attr == EA_4BYTE) // && op1->OperIs(GT_LCL_VAR) && (emitActualTypeSize(compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvType) == EA_8BYTE))
+            if (attr == EA_4BYTE) // && op1->OperIs(GT_LCL_VAR) &&
+                                  // (emitActualTypeSize(compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvType) ==
+                                  // EA_8BYTE))
             {
                 if (treeNode->gtFlags & GTF_UNSIGNED)
                     GetEmitter()->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, retReg, op1->GetRegNum(), 31, 0);
@@ -2085,7 +2023,7 @@ void CodeGen::genLclHeap(GenTree* tree)
     regNumber            pspSymReg                = REG_NA;
     var_types            type                     = genActualType(size->gtType);
     emitAttr             easz                     = emitTypeSize(type);
-    BasicBlock*          endLabel                 = nullptr;//can optimize for loongarch.
+    BasicBlock*          endLabel                 = nullptr; // can optimize for loongarch.
     unsigned             stackAdjustment          = 0;
     const target_ssize_t ILLEGAL_LAST_TOUCH_DELTA = (target_ssize_t)-1;
     target_ssize_t       lastTouchDelta =
@@ -2160,10 +2098,9 @@ void CodeGen::genLclHeap(GenTree* tree)
     if (compiler->lvaOutgoingArgSpaceSize > 0)
     {
         unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
-        //assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+        // assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
         //                                                                // aligned
-        genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, outgoingArgSpaceAligned,
-                             rsGetRsvdReg());
+        genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, outgoingArgSpaceAligned, rsGetRsvdReg());
         stackAdjustment += outgoingArgSpaceAligned;
     }
 
@@ -2206,7 +2143,7 @@ void CodeGen::genLclHeap(GenTree* tree)
             emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SP, 0);
 
             lastTouchDelta = amount;
-            imm = -(ssize_t)amount;
+            imm            = -(ssize_t)amount;
             assert(-8192 <= imm && imm < 0);
             if (-2048 <= imm && imm < 0)
                 emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
@@ -2259,7 +2196,7 @@ void CodeGen::genLclHeap(GenTree* tree)
 
         emit->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(type), regCnt, regCnt, -16);
 
-        assert(imm == (-4 << 2));//goto loop.
+        assert(imm == (-4 << 2)); // goto loop.
         emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2));
 
         lastTouchDelta = 0;
@@ -2307,10 +2244,10 @@ void CodeGen::genLclHeap(GenTree* tree)
         // Overflow, set regCnt to lowest possible value
         emit->emitIns_R_R_R(INS_masknez, EA_PTRSIZE, regCnt, regCnt, REG_R21);
 
-        assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize()>>12)<<12));
-        emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regTmp, compiler->eeGetPageSize()>>12);
+        assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize() >> 12) << 12));
+        emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regTmp, compiler->eeGetPageSize() >> 12);
 
-        //genDefineTempLabel(loop);
+        // genDefineTempLabel(loop);
 
         // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
         emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, REG_SPBASE, 0);
@@ -2320,7 +2257,7 @@ void CodeGen::genLclHeap(GenTree* tree)
 
         assert(regTmp != REG_R21);
 
-        ssize_t imm = 3 << 2;//goto done.
+        ssize_t imm = 3 << 2; // goto done.
         emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, REG_R21, regCnt, imm);
 
         emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, regTmp);
@@ -2330,7 +2267,7 @@ void CodeGen::genLclHeap(GenTree* tree)
         emit->emitIns_I(INS_b, EA_PTRSIZE, imm);
 
         // Done with stack tickle loop
-        //genDefineTempLabel(done);
+        // genDefineTempLabel(done);
 
         // Now just move the final value to SP
         emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0);
@@ -2430,7 +2367,7 @@ void CodeGen::genCodeForBswap(GenTree* tree)
 //    tree - the node
 //
 void CodeGen::genCodeForDivMod(GenTreeOp* tree)
-{//can amend further.
+{ // can amend further.
     assert(tree->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV));
 
     var_types targetType = tree->TypeGet();
@@ -2444,9 +2381,10 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
         assert(varTypeIsFloating(tree->gtOp1));
         assert(varTypeIsFloating(tree->gtOp2));
         assert(tree->gtOper == GT_DIV);
-        //genCodeForBinary(tree);
+        // genCodeForBinary(tree);
         instruction ins = genGetInsForOper(tree);
-        emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(), tree->gtOp2->GetRegNum());
+        emit->emitIns_R_R_R(ins, emitActualTypeSize(targetType), tree->GetRegNum(), tree->gtOp1->GetRegNum(),
+                            tree->gtOp2->GetRegNum());
     }
     else // an integer divide operation
     {
@@ -2461,23 +2399,23 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
         }
         else // the divisor is not the constant zero
         {
-            GenTree* src1 = tree->gtOp1;
+            GenTree* src1     = tree->gtOp1;
             unsigned TypeSize = genTypeSize(genActualType(tree->TypeGet()));
-            emitAttr size = EA_ATTR(TypeSize);
+            emitAttr size     = EA_ATTR(TypeSize);
 
-            assert(TypeSize >= genTypeSize(genActualType(src1->TypeGet()))
-                && TypeSize >= genTypeSize(genActualType(divisorOp->TypeGet())));
+            assert(TypeSize >= genTypeSize(genActualType(src1->TypeGet())) &&
+                   TypeSize >= genTypeSize(genActualType(divisorOp->TypeGet())));
 
-            //ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal;
-            regNumber Reg1 = src1->GetRegNum();
-            regNumber divisorReg = divisorOp->GetRegNum();
+            // ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal;
+            regNumber   Reg1       = src1->GetRegNum();
+            regNumber   divisorReg = divisorOp->GetRegNum();
             instruction ins;
 
             // Check divisorOp first as we can always allow it to be a contained immediate
             if (divisorOp->isContainedIntOrIImmed())
             {
                 ssize_t intConst = (int)(divisorOp->AsIntCon()->gtIconVal);
-                divisorReg = REG_R21;
+                divisorReg       = REG_R21;
                 if ((-2048 <= intConst) && (intConst <= 0x7ff))
                     emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst);
                 else
@@ -2497,7 +2435,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                 {
                     assert(!divisorOp->isContainedIntOrIImmed());
                     ssize_t intConst = (int)(src1->AsIntCon()->gtIconVal);
-                    Reg1 = REG_R21;
+                    Reg1             = REG_R21;
                     if ((-2048 <= intConst) && (intConst <= 0x7ff))
                         emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst);
                     else
@@ -2527,7 +2465,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                 if (divisorOp->IsCnsIntOrI())
                 {
                     ssize_t intConstValue = divisorOp->AsIntCon()->gtIconVal;
-                    //assert(intConstValue != 0); // already checked above by IsIntegralConst(0)
+                    // assert(intConstValue != 0); // already checked above by IsIntegralConst(0)
                     if (intConstValue != -1)
                     {
                         checkDividend = false; // We statically know that the dividend is not -1
@@ -2543,7 +2481,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                 {
                     // Check if the divisor is not -1 branch to 'sdivLabel'
                     emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, -1);
-                    BasicBlock* sdivLabel = genCreateTempLabel();//can optimize for loongarch64.
+                    BasicBlock* sdivLabel = genCreateTempLabel(); // can optimize for loongarch64.
                     emit->emitIns_J_cond_la(INS_bne, sdivLabel, REG_R21, divisorReg);
 
                     // If control flow continues past here the 'divisorReg' is known to be -1
@@ -2555,7 +2493,8 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
 
                     emit->emitIns_J_cond_la(INS_beq, sdivLabel, dividendReg, REG_R0);
 
-                    emit->emitIns_R_R_R(size == EA_4BYTE ? INS_add_w : INS_add_d, size, REG_R21, dividendReg, dividendReg);
+                    emit->emitIns_R_R_R(size == EA_4BYTE ? INS_add_w : INS_add_d, size, REG_R21, dividendReg,
+                                        dividendReg);
                     genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, REG_R21);
                     genDefineTempLabel(sdivLabel);
                 }
@@ -2582,7 +2521,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
 
                 emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg);
             }
-            else //if (tree->gtOper == GT_UDIV) GT_UMOD
+            else // if (tree->gtOper == GT_UDIV) GT_UMOD
             {
                 // Only one possible exception
                 //     (AnyVal /  0) => DivideByZeroException
@@ -2607,7 +2546,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                     else
                         ins = INS_mod_wu;
 
-                    //TODO: temp workround, should amend for optimize.
+                    // TODO: temp workround, should amend for optimize.
                     emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0);
                     emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, divisorReg, divisorReg, 0);
                 }
@@ -2846,10 +2785,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
 
             emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
             emit->emitIns_R_R_I(INS_ld_d, attr1, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
-            emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF,
+                                2 * TARGET_POINTER_SIZE);
             emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
             emit->emitIns_R_R_I(INS_st_d, attr1, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
-            emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF,
+                                2 * TARGET_POINTER_SIZE);
             i += 2;
         }
 
@@ -2859,9 +2800,11 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
             emitAttr attr0 = emitTypeSize(layout->GetGCPtrType(i + 0));
 
             emit->emitIns_R_R_I(INS_ld_d, attr0, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
-            emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF,
+                                TARGET_POINTER_SIZE);
             emit->emitIns_R_R_I(INS_st_d, attr0, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
-            emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
+            emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF,
+                                TARGET_POINTER_SIZE);
         }
     }
     else
@@ -2878,18 +2821,22 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
                 {
                     emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
                     emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
-                    emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF,
+                                        REG_WRITE_BARRIER_SRC_BYREF, 2 * TARGET_POINTER_SIZE);
                     emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
                     emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
-                    emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF,
+                                        REG_WRITE_BARRIER_DST_BYREF, 2 * TARGET_POINTER_SIZE);
                     ++i; // extra increment of i, since we are copying two items
                 }
                 else
                 {
                     emit->emitIns_R_R_I(INS_ld_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0);
-                    emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_addi_d, attrSrcAddr, REG_WRITE_BARRIER_SRC_BYREF,
+                                        REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE);
                     emit->emitIns_R_R_I(INS_st_d, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0);
-                    emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
+                    emit->emitIns_R_R_I(INS_addi_d, attrDstAddr, REG_WRITE_BARRIER_DST_BYREF,
+                                        REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE);
                 }
             }
             else
@@ -2931,7 +2878,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode)
     GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, baseReg, baseReg, 0);
 
     // add it to the absolute address of fgFirstBB
-    //compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;//TODO for LOONGARCH64.
+    // compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;//TODO for LOONGARCH64.
     GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
     GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, baseReg, baseReg, tmpReg);
 
@@ -3000,17 +2947,17 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
 
 static inline bool isImmed(GenTree* treeNode)
 {
-        if (treeNode->gtGetOp1()->isContainedIntOrIImmed())
-        {
+    if (treeNode->gtGetOp1()->isContainedIntOrIImmed())
+    {
+        return true;
+    }
+    else if (treeNode->OperIsBinary())
+    {
+        if (treeNode->gtGetOp2()->isContainedIntOrIImmed())
             return true;
-        }
-        else if (treeNode->OperIsBinary())
-        {
-            if (treeNode->gtGetOp2()->isContainedIntOrIImmed())
-                return true;
-        }
+    }
 
-        return false;
+    return false;
 }
 
 instruction CodeGen::genGetInsForOper(GenTree* treeNode)
@@ -3019,8 +2966,8 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
     genTreeOps oper = treeNode->OperGet();
     GenTree*   op1  = treeNode->gtGetOp1();
     GenTree*   op2;
-    emitAttr   attr = emitActualTypeSize(treeNode);
-    bool isImm = false;
+    emitAttr   attr  = emitActualTypeSize(treeNode);
+    bool       isImm = false;
 
     instruction ins = INS_break;
 
@@ -3161,7 +3108,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
             case GT_MUL:
                 if ((attr == EA_8BYTE) || (attr == EA_BYREF))
                 {
-                    op2  = treeNode->gtGetOp2();
+                    op2 = treeNode->gtGetOp2();
                     if (genActualTypeIsInt(op1) && genActualTypeIsInt(op2))
                         ins = treeNode->IsUnsigned() ? INS_mulw_d_wu : INS_mulw_d_w;
                     else
@@ -3220,7 +3167,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 isImm = isImmed(treeNode);
                 if (isImm)
                 {
-                    //it's better to check sa.
+                    // it's better to check sa.
                     if (attr == EA_4BYTE)
                         ins = INS_slli_w;
                     else
@@ -3239,7 +3186,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 isImm = isImmed(treeNode);
                 if (isImm)
                 {
-                    //it's better to check sa.
+                    // it's better to check sa.
                     if (attr == EA_4BYTE)
                         ins = INS_srli_w;
                     else
@@ -3258,7 +3205,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 isImm = isImmed(treeNode);
                 if (isImm)
                 {
-                    //it's better to check sa.
+                    // it's better to check sa.
                     if (attr == EA_4BYTE)
                         ins = INS_srai_w;
                     else
@@ -3277,7 +3224,7 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 isImm = isImmed(treeNode);
                 if (isImm)
                 {
-                    //it's better to check sa.
+                    // it's better to check sa.
                     if (attr == EA_4BYTE)
                         ins = INS_rotri_w;
                     else
@@ -3332,45 +3279,45 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
     BasicBlock* skipLabel = genCreateTempLabel();
     GetEmitter()->emitIns_J_cond_la(INS_beq, skipLabel, data->GetRegNum(), REG_R0);
 
-    void* pAddr = nullptr;
-    void* addr = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr);
+    void*                 pAddr = nullptr;
+    void*                 addr  = compiler->compGetHelperFtn(CORINFO_HELP_STOP_FOR_GC, &pAddr);
     emitter::EmitCallType callType;
-    regNumber callTarget;
+    regNumber             callTarget;
 
     if (addr == nullptr)
     {
-        callType = emitter::EC_INDIR_R;
+        callType   = emitter::EC_INDIR_R;
         callTarget = REG_DEFAULT_HELPER_CALL_TARGET;
 
-        //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+        // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
         if (compiler->opts.compReloc)
         {
             GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
         }
         else
         {
-            //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
-            //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
-            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12);
+            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
+            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12);
             GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32);
-            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2);
         }
         regSet.verifyRegUsed(callTarget);
-        //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
+        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
     }
     else
     {
-        callType = emitter::EC_FUNC_TOKEN;
+        callType   = emitter::EC_FUNC_TOKEN;
         callTarget = REG_NA;
     }
 
     ////TODO: can optimize further !!!
-    GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC), INDEBUG_LDISASM_COMMA(nullptr) addr, 0,
-                               EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
-                               gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
-                               callTarget,                             /* ireg */
-                               REG_NA, 0, 0,                           /* xreg, xmul, disp */
-                               false                                   /* isJump */
+    GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC),
+                               INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur,
+                               gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
+                               callTarget,                                                    /* ireg */
+                               REG_NA, 0, 0,                                                  /* xreg, xmul, disp */
+                               false                                                          /* isJump */
                                );
 
     genDefineTempLabel(skipLabel);
@@ -3443,8 +3390,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
             dataReg = data->GetRegNum();
         }
 
-        var_types type = tree->TypeGet();
-        instruction ins = ins_Store(type);
+        var_types   type = tree->TypeGet();
+        instruction ins  = ins_Store(type);
 
         if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
         {
@@ -3491,27 +3438,27 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     assert(genIsValidFloatReg(targetReg));
 
     GenTree* op1 = treeNode->AsOp()->gtOp1;
-    assert(!op1->isContained());             // Cannot be contained
+    assert(!op1->isContained());                // Cannot be contained
     assert(genIsValidIntReg(op1->GetRegNum())); // Must be a valid int reg.
 
     var_types dstType = treeNode->CastToType();
     var_types srcType = genActualType(op1->TypeGet());
     assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
 
-    emitter *emit = GetEmitter();
+    emitter* emit = GetEmitter();
     emitAttr attr = emitActualTypeSize(dstType);
 
     // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
     emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
     noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
 
-    bool IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED;
-    instruction ins = INS_invalid;
+    bool        IsUnsigned = treeNode->gtFlags & GTF_UNSIGNED;
+    instruction ins        = INS_invalid;
 
     genConsumeOperands(treeNode->AsOp());
 
     if (IsUnsigned)
-    {//should amend.
+    {                                                                                   // should amend.
         emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, REG_SCRATCH_FLT, op1->GetRegNum()); // save op1
 
         if (srcSize == EA_8BYTE)
@@ -3571,8 +3518,10 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
             ssize_t imm = 3 << 2;
             emit->emitIns_R_R_I(INS_bge, EA_8BYTE, op1->GetRegNum(), REG_R0, imm);
 
-            emit->emitIns_R_R(dstType == TYP_DOUBLE ? INS_fmov_d : INS_fmov_s, attr, REG_SCRATCH_FLT, treeNode->GetRegNum());
-            emit->emitIns_R_R_R(dstType == TYP_DOUBLE ? INS_fadd_d : INS_fadd_s, attr, treeNode->GetRegNum(), REG_SCRATCH_FLT, treeNode->GetRegNum());
+            emit->emitIns_R_R(dstType == TYP_DOUBLE ? INS_fmov_d : INS_fmov_s, attr, REG_SCRATCH_FLT,
+                              treeNode->GetRegNum());
+            emit->emitIns_R_R_R(dstType == TYP_DOUBLE ? INS_fadd_d : INS_fadd_s, attr, treeNode->GetRegNum(),
+                                REG_SCRATCH_FLT, treeNode->GetRegNum());
         }
     }
 
@@ -3604,7 +3553,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
 
     GenTree* op1 = treeNode->AsOp()->gtOp1;
-    assert(!op1->isContained());               // Cannot be contained
+    assert(!op1->isContained());                  // Cannot be contained
     assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg.
 
     var_types dstType = treeNode->CastToType();
@@ -3618,9 +3567,9 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
     noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
 
-    instruction ins1 = INS_invalid;
-    instruction ins2 = INS_invalid;
-    bool IsUnsigned = varTypeIsUnsigned(dstType);
+    instruction ins1       = INS_invalid;
+    instruction ins2       = INS_invalid;
+    bool        IsUnsigned = varTypeIsUnsigned(dstType);
 
     regNumber tmpReg = REG_SCRATCH_FLT;
     assert(tmpReg != op1->GetRegNum());
@@ -3688,7 +3637,8 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
         //{
         //    GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, tmpReg, REG_R0);
 
-        //    GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_c_olt_d : INS_c_olt_s, EA_8BYTE, op1->GetRegNum(), tmpReg, 2);
+        //    GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_c_olt_d : INS_c_olt_s, EA_8BYTE, op1->GetRegNum(),
+        //    tmpReg, 2);
         //    GetEmitter()->emitIns_I_I(INS_bc1f, EA_PTRSIZE, 2, 4 << 2);
 
         //    GetEmitter()->emitIns_R_R_I(INS_ori*/, EA_PTRSIZE, treeNode->GetRegNum(), REG_R0, 0);
@@ -3702,15 +3652,18 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
 
         GetEmitter()->emitIns_R_R(srcType == TYP_DOUBLE ? INS_movgr2fr_d : INS_movgr2fr_w, EA_8BYTE, tmpReg, REG_R21);
 
-        GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_fcmp_clt_d : INS_fcmp_clt_s, EA_8BYTE, op1->GetRegNum(), tmpReg, 2);
+        GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_fcmp_clt_d : INS_fcmp_clt_s, EA_8BYTE, op1->GetRegNum(),
+                                    tmpReg, 2);
 
         GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 0);
         GetEmitter()->emitIns_I_I(INS_bcnez, EA_PTRSIZE, 2, 4 << 2);
 
-        GetEmitter()->emitIns_R_R_R(srcType == TYP_DOUBLE ? INS_fsub_d : INS_fsub_s, EA_8BYTE, tmpReg, op1->GetRegNum(), tmpReg);
+        GetEmitter()->emitIns_R_R_R(srcType == TYP_DOUBLE ? INS_fsub_d : INS_fsub_s, EA_8BYTE, tmpReg, op1->GetRegNum(),
+                                    tmpReg);
 
         GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, 1);
-        GetEmitter()->emitIns_R_R_I(dstSize == EA_8BYTE ? INS_slli_d : INS_slli_w, EA_PTRSIZE, REG_R21, REG_R21, dstSize == EA_8BYTE ? 63 : 31);
+        GetEmitter()->emitIns_R_R_I(dstSize == EA_8BYTE ? INS_slli_d : INS_slli_w, EA_PTRSIZE, REG_R21, REG_R21,
+                                    dstSize == EA_8BYTE ? 63 : 31);
 
         GetEmitter()->emitIns_R_R_R_I(INS_fsel, EA_PTRSIZE, tmpReg, tmpReg, op1->GetRegNum(), 2);
 
@@ -3744,11 +3697,11 @@ void CodeGen::genCkfinite(GenTree* treeNode)
 {
     assert(treeNode->OperGet() == GT_CKFINITE);
 
-    GenTree*  op1         = treeNode->AsOp()->gtOp1;
-    var_types targetType  = treeNode->TypeGet();
-    ssize_t   expMask     = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent.
-    ssize_t   size        = (targetType == TYP_FLOAT) ? 8 : 11;  // Bit size to extract exponent.
-    ssize_t   pos         = (targetType == TYP_FLOAT) ? 23 : 52; // Bit pos of exponent.
+    GenTree*  op1        = treeNode->AsOp()->gtOp1;
+    var_types targetType = treeNode->TypeGet();
+    ssize_t   expMask    = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent.
+    ssize_t   size       = (targetType == TYP_FLOAT) ? 8 : 11;       // Bit size to extract exponent.
+    ssize_t   pos        = (targetType == TYP_FLOAT) ? 23 : 52;      // Bit pos of exponent.
 
     emitter* emit = GetEmitter();
     emitAttr attr = emitActualTypeSize(treeNode);
@@ -3761,7 +3714,7 @@ void CodeGen::genCkfinite(GenTree* treeNode)
 
     // Mask of exponent with all 1's and check if the exponent is all 1's
     instruction ins = (targetType == TYP_FLOAT) ? INS_bstrpick_w : INS_bstrpick_d;
-    emit->emitIns_R_R_I_I(ins, EA_PTRSIZE, intReg, intReg, pos+size-1, pos);
+    emit->emitIns_R_R_I_I(ins, EA_PTRSIZE, intReg, intReg, pos + size - 1, pos);
     emit->emitIns_R_R_I(INS_xori, attr, intReg, intReg, expMask);
 
     genJumpToThrowHlpBlk_la(SCK_ARITH_EXCPN, INS_beq, intReg);
@@ -3785,19 +3738,19 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
     emitter* emit = GetEmitter();
 
     GenTreeOp* tree = nullptr;
-    regNumber targetReg;
+    regNumber  targetReg;
     if (jtree->OperIs(GT_JTRUE))
     {
-        tree = jtree->gtGetOp1()->AsOp();
+        tree      = jtree->gtGetOp1()->AsOp();
         targetReg = REG_RA;
         assert(tree->GetRegNum() == REG_NA);
 
-        jtree->gtOp2 = (GenTree*)REG_RA;//targetReg
+        jtree->gtOp2 = (GenTree*)REG_RA; // targetReg
         jtree->SetRegNum((regNumber)INS_bnez);
     }
     else
     {
-        tree = jtree;
+        tree      = jtree;
         targetReg = tree->GetRegNum();
     }
     assert(targetReg != REG_NA);
@@ -3821,39 +3774,51 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
         assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
         bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0;
 
-        if(IsUnordered)
-        {
-            if(tree->OperIs(GT_LT))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_LE))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_EQ))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_NE))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_GT))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_GE))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/);
+        if (IsUnordered)
+        {
+            if (tree->OperIs(GT_LT))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_LE))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_EQ))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_NE))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_GT))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(),
+                                    op1->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_GE))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(),
+                                    op1->GetRegNum(), 1 /*cc*/);
         }
         else
         {
-            if(tree->OperIs(GT_LT))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_LE))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_EQ))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_NE))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_GT))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/);
-            else if(tree->OperIs(GT_GE))
-                emit->emitIns_R_R_I(cmpSize==EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(), op1->GetRegNum(), 1/*cc*/);
+            if (tree->OperIs(GT_LT))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_LE))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_EQ))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_NE))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(),
+                                    op2->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_GT))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(),
+                                    op1->GetRegNum(), 1 /*cc*/);
+            else if (tree->OperIs(GT_GE))
+                emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(),
+                                    op1->GetRegNum(), 1 /*cc*/);
         }
 
         emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0);
-        emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1/*cc*/);
+        emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1 /*cc*/);
     }
     else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed())
     {
@@ -3862,72 +3827,72 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
 
         assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
 
-        bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
-        instruction ins = INS_beqz;
+        bool        IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
+        instruction ins        = INS_beqz;
 
         switch (cmpSize)
         {
-        case EA_4BYTE:
+            case EA_4BYTE:
             {
                 imm1 = static_cast<int32_t>(imm1);
                 imm2 = static_cast<int32_t>(imm2);
             }
             break;
-        case EA_8BYTE:
-            break;
-        case EA_1BYTE:
+            case EA_8BYTE:
+                break;
+            case EA_1BYTE:
             {
                 imm1 = static_cast<int8_t>(imm1);
                 imm2 = static_cast<int8_t>(imm2);
             }
             break;
-        //case EA_2BYTE:
-        //    imm = static_cast<uint16_t>(imm);
-        //    break;
-        default:
-            assert(!"Unexpected type in jumpCompare.");
+            // case EA_2BYTE:
+            //    imm = static_cast<uint16_t>(imm);
+            //    break;
+            default:
+                assert(!"Unexpected type in jumpCompare.");
         }
 
         switch (tree->OperGet())
         {
-        case GT_LT:
-            if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2)))
-            {
-                ins = INS_b;
-            }
-            break;
-        case GT_LE:
-            if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2)))
-            {
-                ins = INS_b;
-            }
-            break;
-        case GT_EQ:
-            if (imm1 == imm2)
-            {
-                ins = INS_b;
-            }
-            break;
-        case GT_NE:
-            if (imm1 != imm2)
-            {
-                ins = INS_b;
-            }
-            break;
-        case GT_GT:
-            if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2)))
-            {
-                ins = INS_b;
-            }
-            break;
-        case GT_GE:
-            if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2)))
-            {
-                ins = INS_b;
-            }
-            break;
-        default:
-            break;
+            case GT_LT:
+                if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2)))
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_LE:
+                if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2)))
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_EQ:
+                if (imm1 == imm2)
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_NE:
+                if (imm1 != imm2)
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_GT:
+                if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2)))
+                {
+                    ins = INS_b;
+                }
+                break;
+            case GT_GE:
+                if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2)))
+                {
+                    ins = INS_b;
+                }
+                break;
+            default:
+                break;
         }
 
         assert(ins != INS_invalid);
@@ -3936,34 +3901,34 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
     }
     else
     {
-        //TODO:can optimize further.
+        // TODO:can optimize further.
         if (op1->isContainedIntOrIImmed())
         {
             op1 = tree->gtOp2;
             op2 = tree->gtOp1;
             switch (tree->OperGet())
             {
-            case GT_LT:
-                tree->SetOper(GT_GT);
-                break;
-            case GT_LE:
-                tree->SetOper(GT_GE);
-                break;
-            case GT_GT:
-                tree->SetOper(GT_LT);
-                break;
-            case GT_GE:
-                tree->SetOper(GT_LE);
-                break;
-            default:
-                break;
+                case GT_LT:
+                    tree->SetOper(GT_GT);
+                    break;
+                case GT_LE:
+                    tree->SetOper(GT_GE);
+                    break;
+                case GT_GT:
+                    tree->SetOper(GT_LT);
+                    break;
+                case GT_GE:
+                    tree->SetOper(GT_LE);
+                    break;
+                default:
+                    break;
             }
         }
         assert(!op1->isContainedIntOrIImmed());
         assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
 
-        bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
-        regNumber regOp1 = op1->GetRegNum();
+        bool      IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
+        regNumber regOp1     = op1->GetRegNum();
 
         if (op2->isContainedIntOrIImmed())
         {
@@ -3972,71 +3937,84 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
             {
                 switch (cmpSize)
                 {
-                case EA_4BYTE:
-                    imm = static_cast<int32_t>(imm);
-                    break;
-                case EA_8BYTE:
-                    break;
-                case EA_1BYTE:
-                    imm = static_cast<int8_t>(imm);
-                    break;
-                //case EA_2BYTE:
-                //    imm = static_cast<uint16_t>(imm);
-                //    break;
-                default:
-                    assert(!"Unexpected type in jumpTrue(imm).");
+                    case EA_4BYTE:
+                        imm = static_cast<int32_t>(imm);
+                        break;
+                    case EA_8BYTE:
+                        break;
+                    case EA_1BYTE:
+                        imm = static_cast<int8_t>(imm);
+                        break;
+                    // case EA_2BYTE:
+                    //    imm = static_cast<uint16_t>(imm);
+                    //    break;
+                    default:
+                        assert(!"Unexpected type in jumpTrue(imm).");
                 }
             }
 
             if (tree->OperIs(GT_LT))
             {
-                if (!IsUnsigned && isValidSimm12(imm)) {
+                if (!IsUnsigned && isValidSimm12(imm))
+                {
                     emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
                 }
-                else if (IsUnsigned && isValidUimm11(imm)) {
+                else if (IsUnsigned && isValidUimm11(imm))
+                {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm);
                 }
-                else {
+                else
+                {
                     emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
                     emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
                 }
             }
             else if (tree->OperIs(GT_LE))
             {
-                if (!IsUnsigned && isValidSimm12(imm + 1)) {
+                if (!IsUnsigned && isValidSimm12(imm + 1))
+                {
                     emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1);
                 }
-                else if (IsUnsigned && isValidUimm11(imm + 1)) {
+                else if (IsUnsigned && isValidUimm11(imm + 1))
+                {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1);
                 }
-                else {
+                else
+                {
                     emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm + 1);
                     emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
                 }
             }
             else if (tree->OperIs(GT_GT))
             {
-                if (!IsUnsigned && isValidSimm12(imm + 1)) {
+                if (!IsUnsigned && isValidSimm12(imm + 1))
+                {
                     emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_RA, regOp1, imm + 1);
                     emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1);
                 }
-                else if (IsUnsigned && isValidUimm11(imm + 1)) {
+                else if (IsUnsigned && isValidUimm11(imm + 1))
+                {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, REG_RA, regOp1, imm + 1);
                     emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1);
                 }
-                else {
+                else
+                {
                     emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
                     emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, REG_RA, regOp1);
                 }
             }
             else if (tree->OperIs(GT_GE))
-            {   if (!IsUnsigned && isValidSimm12(imm)) {
+            {
+                if (!IsUnsigned && isValidSimm12(imm))
+                {
                     emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
                 }
-                else if (IsUnsigned && isValidUimm11(imm)) {
+                else if (IsUnsigned && isValidUimm11(imm))
+                {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm);
                 }
-                else {
+                else
+                {
                     emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
                     emit->emitIns_R_R_R(IsUnsigned ? INS_sltu : INS_slt, EA_PTRSIZE, targetReg, regOp1, REG_RA);
                 }
@@ -4044,14 +4022,17 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
             }
             else if (tree->OperIs(GT_NE))
             {
-                if (!imm) {
+                if (!imm)
+                {
                     emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1);
                 }
-                else if (isValidUimm12(imm)) {
+                else if (isValidUimm12(imm))
+                {
                     emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
                     emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
                 }
-                else {
+                else
+                {
                     emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
                     emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
                     emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
@@ -4059,14 +4040,17 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
             }
             else if (tree->OperIs(GT_EQ))
             {
-                if (!imm) {
+                if (!imm)
+                {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, 1);
                 }
-                else if (isValidUimm12(imm)) {
+                else if (isValidUimm12(imm))
+                {
                     emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
                 }
-                else {
+                else
+                {
                     emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
                     emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, REG_RA);
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
@@ -4079,7 +4063,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
         {
             regNumber tmpRegOp1 = tree->ExtractTempReg();
             regNumber tmpRegOp2 = tree->ExtractTempReg();
-            regNumber regOp2 = op2->GetRegNum();
+            regNumber regOp2    = op2->GetRegNum();
             if (cmpSize == EA_4BYTE)
             {
                 regOp1 = tmpRegOp1;
@@ -4138,26 +4122,27 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
 //
 void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
 {
-    //assert(compiler->compCurBB->bbJumpKind == BBJ_COND);//should confirm.
+    // assert(compiler->compCurBB->bbJumpKind == BBJ_COND);//should confirm.
     ////assert(jtrue->OperIs(GT_JTRUE));
 
     emitter* emit = GetEmitter();
 
-    GenTreeOp* tree = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue;
-    regNumber targetReg = tree->GetRegNum();
-    instruction ins = INS_invalid;
+    GenTreeOp*  tree      = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue;
+    regNumber   targetReg = tree->GetRegNum();
+    instruction ins       = INS_invalid;
 
     if (jtrue->OperIs(GT_JTRUE) && jtrue->gtOp2)
     {
-        emit->emitIns_J((instruction)jtrue->GetRegNum(), compiler->compCurBB->bbJumpDest, (int)(int64_t)jtrue->gtOp2);//5-bits;
+        emit->emitIns_J((instruction)jtrue->GetRegNum(), compiler->compCurBB->bbJumpDest,
+                        (int)(int64_t)jtrue->gtOp2); // 5-bits;
         jtrue->SetRegNum(REG_NA);
         jtrue->gtOp2 = nullptr;
         return;
     }
     else
     {
-        GenTree*  op1 = tree->gtOp1;
-        GenTree*  op2 = tree->gtOp2;
+        GenTree* op1 = tree->gtOp1;
+        GenTree* op2 = tree->gtOp2;
 
         var_types op1Type = genActualType(op1->TypeGet());
         var_types op2Type = genActualType(op2->TypeGet());
@@ -4177,7 +4162,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
         if (varTypeIsFloating(op1Type))
         {
             assert(genTypeSize(op1Type) == genTypeSize(op2Type));
-            //int cc = 1;
+            // int cc = 1;
 
             assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
             bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0;
@@ -4186,57 +4171,69 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
             {
                 ins = INS_bcnez;
                 if (cmpSize == EA_4BYTE)
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_s : INS_fcmp_ceq_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_s : INS_fcmp_ceq_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
                 else
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_d : INS_fcmp_ceq_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cueq_d : INS_fcmp_ceq_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
             }
             else if (tree->OperIs(GT_NE))
             {
                 ins = INS_bceqz;
                 if (cmpSize == EA_4BYTE)
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_s : INS_fcmp_cueq_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_s : INS_fcmp_cueq_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
                 else
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_d : INS_fcmp_cueq_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_ceq_d : INS_fcmp_cueq_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
             }
             else if (tree->OperIs(GT_LT))
             {
                 ins = INS_bcnez;
                 if (cmpSize == EA_4BYTE)
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_s : INS_fcmp_clt_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_s : INS_fcmp_clt_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
                 else
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_d : INS_fcmp_clt_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cult_d : INS_fcmp_clt_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
             }
             else if (tree->OperIs(GT_LE))
             {
                 ins = INS_bcnez;
                 if (cmpSize == EA_4BYTE)
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_s : INS_fcmp_cle_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_s : INS_fcmp_cle_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
                 else
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_d : INS_fcmp_cle_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cule_d : INS_fcmp_cle_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
             }
             else if (tree->OperIs(GT_GE))
             {
                 ins = INS_bceqz;
                 if (cmpSize == EA_4BYTE)
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_s : INS_fcmp_cult_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_s : INS_fcmp_cult_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
                 else
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_d : INS_fcmp_cult_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_clt_d : INS_fcmp_cult_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
             }
             else if (tree->OperIs(GT_GT))
             {
                 ins = INS_bceqz;
                 if (cmpSize == EA_4BYTE)
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_s : INS_fcmp_cule_s, EA_4BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_s : INS_fcmp_cule_s, EA_4BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
                 else
-                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_d : INS_fcmp_cule_d, EA_8BYTE, op1->GetRegNum(), op2->GetRegNum(), 1/*cc*/);
+                    emit->emitIns_R_R_I(IsUnordered ? INS_fcmp_cle_d : INS_fcmp_cule_d, EA_8BYTE, op1->GetRegNum(),
+                                        op2->GetRegNum(), 1 /*cc*/);
             }
 
-            //assert(0 <= cc && cc < 8);
+            // assert(0 <= cc && cc < 8);
             if (IsEq)
-                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1/*cc*/);//5-bits;
+                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1 /*cc*/); // 5-bits;
             else
             {
-                jtrue->gtOp2 = (GenTree*)(1/*cc*/);
+                jtrue->gtOp2 = (GenTree*)(1 /*cc*/);
                 jtrue->SetRegNum((regNumber)ins);
             }
         }
@@ -4251,70 +4248,70 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
 
             switch (cmpSize)
             {
-            case EA_4BYTE:
+                case EA_4BYTE:
                 {
                     imm1 = static_cast<int32_t>(imm1);
                     imm2 = static_cast<int32_t>(imm2);
                 }
                 break;
-            case EA_8BYTE:
-                break;
-            case EA_1BYTE:
+                case EA_8BYTE:
+                    break;
+                case EA_1BYTE:
                 {
                     imm1 = static_cast<int8_t>(imm1);
                     imm2 = static_cast<int8_t>(imm2);
                 }
                 break;
-            //case EA_2BYTE:
-            //    imm = static_cast<uint16_t>(imm);
-            //    break;
-            default:
-                assert(!"Unexpected type in jumpTrue.");
+                // case EA_2BYTE:
+                //    imm = static_cast<uint16_t>(imm);
+                //    break;
+                default:
+                    assert(!"Unexpected type in jumpTrue.");
             }
             switch (tree->OperGet())
             {
-            case GT_LT:
-                if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2)))
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_LE:
-                if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2)))
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_EQ:
-                if (imm1 == imm2)
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_NE:
-                if (imm1 != imm2)
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_GT:
-                if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2)))
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_GE:
-                if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2)))
-                {
-                    ins = INS_b;
-                }
-                break;
-            default:
-                break;
+                case GT_LT:
+                    if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2)))
+                    {
+                        ins = INS_b;
+                    }
+                    break;
+                case GT_LE:
+                    if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2)))
+                    {
+                        ins = INS_b;
+                    }
+                    break;
+                case GT_EQ:
+                    if (imm1 == imm2)
+                    {
+                        ins = INS_b;
+                    }
+                    break;
+                case GT_NE:
+                    if (imm1 != imm2)
+                    {
+                        ins = INS_b;
+                    }
+                    break;
+                case GT_GT:
+                    if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2)))
+                    {
+                        ins = INS_b;
+                    }
+                    break;
+                case GT_GE:
+                    if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2)))
+                    {
+                        ins = INS_b;
+                    }
+                    break;
+                default:
+                    break;
             }
 
             if (IsEq && (ins != INS_invalid))
-                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0);//5-bits;
+                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0); // 5-bits;
             else if (ins != INS_invalid)
             {
                 jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg;
@@ -4323,27 +4320,27 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
         }
         else
         {
-            //TODO:can optimize further.
+            // TODO:can optimize further.
             if (op1->isContainedIntOrIImmed())
             {
                 op1 = tree->gtOp2;
                 op2 = tree->gtOp1;
                 switch (tree->OperGet())
                 {
-                case GT_LT:
-                    tree->SetOper(GT_GT);
-                    break;
-                case GT_LE:
-                    tree->SetOper(GT_GE);
-                    break;
-                case GT_GT:
-                    tree->SetOper(GT_LT);
-                    break;
-                case GT_GE:
-                    tree->SetOper(GT_LE);
-                    break;
-                default:
-                    break;
+                    case GT_LT:
+                        tree->SetOper(GT_GT);
+                        break;
+                    case GT_LE:
+                        tree->SetOper(GT_GE);
+                        break;
+                    case GT_GT:
+                        tree->SetOper(GT_LT);
+                        break;
+                    case GT_GE:
+                        tree->SetOper(GT_LE);
+                        break;
+                    default:
+                        break;
                 }
             }
 
@@ -4361,52 +4358,58 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                 {
                     switch (cmpSize)
                     {
-                    case EA_4BYTE:
-                        if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED))
-                            imm = static_cast<uint32_t>(imm);
-                        else
-                            imm = static_cast<int32_t>(imm);
-                        break;
-                    case EA_8BYTE:
-                        break;
-                    case EA_1BYTE:
-                        imm = static_cast<int8_t>(imm);
-                        break;
-                    //case EA_2BYTE:
-                    //    imm = static_cast<uint16_t>(imm);
-                    //    break;
-                    default:
-                        assert(!"Unexpected type in jumpTrue(imm).");
+                        case EA_4BYTE:
+                            if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED))
+                                imm = static_cast<uint32_t>(imm);
+                            else
+                                imm = static_cast<int32_t>(imm);
+                            break;
+                        case EA_8BYTE:
+                            break;
+                        case EA_1BYTE:
+                            imm = static_cast<int8_t>(imm);
+                            break;
+                        // case EA_2BYTE:
+                        //    imm = static_cast<uint16_t>(imm);
+                        //    break;
+                        default:
+                            assert(!"Unexpected type in jumpTrue(imm).");
                     }
 
-                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);//TODO: maybe optimize.
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); // TODO: maybe optimize.
                 }
                 else
                 {
                     SaveCcResultReg = 0;
                 }
 
-                if (tree->OperIs(GT_LT)) {
+                if (tree->OperIs(GT_LT))
+                {
                     SaveCcResultReg |= ((int)regOp1);
                     ins = IsUnsigned ? INS_bltu : INS_blt;
                 }
-                else if (tree->OperIs(GT_LE)) {
+                else if (tree->OperIs(GT_LE))
+                {
                     SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5);
-                    ins = IsUnsigned ? INS_bgeu : INS_bge;
+                    ins             = IsUnsigned ? INS_bgeu : INS_bge;
                 }
-                else if (tree->OperIs(GT_GT)) {
+                else if (tree->OperIs(GT_GT))
+                {
                     SaveCcResultReg = imm ? ((((int)regOp1) << 5) | (int)REG_RA) : (((int)regOp1) << 5);
-                    ins = IsUnsigned ? INS_bltu : INS_blt;
+                    ins             = IsUnsigned ? INS_bltu : INS_blt;
                 }
-                else if (tree->OperIs(GT_GE)) {
+                else if (tree->OperIs(GT_GE))
+                {
                     SaveCcResultReg |= ((int)regOp1);
                     ins = IsUnsigned ? INS_bgeu : INS_bge;
                 }
-                else if (tree->OperIs(GT_NE)) {
+                else if (tree->OperIs(GT_NE))
+                {
                     SaveCcResultReg |= ((int)regOp1);
                     ins = INS_bne;
                 }
-                else if (tree->OperIs(GT_EQ)) {
+                else if (tree->OperIs(GT_EQ))
+                {
                     SaveCcResultReg |= ((int)regOp1);
                     ins = INS_beq;
                 }
@@ -4414,29 +4417,32 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
             else
             {
                 regNumber regOp2 = op2->GetRegNum();
-                if (IsUnsigned  && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
-                {//TODO: should amend further!!!
+                if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) &&
+                    compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
+                { // TODO: should amend further!!!
                     regNumber tmpRegOp1 = tree->ExtractTempReg();
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
                     regOp1 = tmpRegOp1;
                     regOp2 = REG_RA;
                 }
-                else if (IsUnsigned  && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) && compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate())
-                {//TODO: should amend further!!!
+                else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) &&
+                         compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate())
+                { // TODO: should amend further!!!
                     regNumber tmpRegOp1 = tree->ExtractTempReg();
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
                     regOp1 = tmpRegOp1;
                     regOp2 = REG_RA;
                 }
-                else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) && compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
-                {//TODO: should amend further!!!
+                else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) &&
+                         compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
+                { // TODO: should amend further!!!
                     emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, regOp2, 0);
                     regOp2 = REG_RA;
                 }
                 else if (cmpSize == EA_4BYTE && ((op1->gtFlags | op2->gtFlags) & GTF_UNSIGNED))
-                {//TODO: should amend further!!!
+                { // TODO: should amend further!!!
                     if (!(op1->gtFlags & GTF_UNSIGNED))
                     {
                         regNumber tmpRegOp1 = tree->ExtractTempReg();
@@ -4450,34 +4456,40 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                     }
                 }
 
-                if (tree->OperIs(GT_LT)) {
+                if (tree->OperIs(GT_LT))
+                {
                     SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5));
-                    ins = IsUnsigned ? INS_bltu : INS_blt;
+                    ins             = IsUnsigned ? INS_bltu : INS_blt;
                 }
-                else if (tree->OperIs(GT_LE)) {
+                else if (tree->OperIs(GT_LE))
+                {
                     SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
-                    ins = IsUnsigned ? INS_bgeu : INS_bge;
+                    ins             = IsUnsigned ? INS_bgeu : INS_bge;
                 }
-                else if (tree->OperIs(GT_GT)) {
+                else if (tree->OperIs(GT_GT))
+                {
                     SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
-                    ins = IsUnsigned ? INS_bltu : INS_blt;
+                    ins             = IsUnsigned ? INS_bltu : INS_blt;
                 }
-                else if (tree->OperIs(GT_GE)) {
+                else if (tree->OperIs(GT_GE))
+                {
                     SaveCcResultReg = ((int)regOp1 | ((int)regOp2 << 5));
-                    ins = IsUnsigned ? INS_bgeu : INS_bge;
+                    ins             = IsUnsigned ? INS_bgeu : INS_bge;
                 }
-                else if (tree->OperIs(GT_NE)) {
+                else if (tree->OperIs(GT_NE))
+                {
                     SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
-                    ins = INS_bne;
+                    ins             = INS_bne;
                 }
-                else if (tree->OperIs(GT_EQ)) {
+                else if (tree->OperIs(GT_EQ))
+                {
                     SaveCcResultReg = (((int)regOp1) << 5) | (int)regOp2;
-                    ins = INS_beq;
+                    ins             = INS_beq;
                 }
             }
 
             if (IsEq)
-                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg);//5-bits;
+                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg); // 5-bits;
             else
             {
                 jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg;
@@ -4533,7 +4545,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
     regNumber reg  = op1->GetRegNum();
     emitAttr  attr = emitActualTypeSize(op1->TypeGet());
 
-    //if (tree->gtFlags & GTF_JCMP_TST)
+    // if (tree->gtFlags & GTF_JCMP_TST)
     //{
     //    assert(!"unimplemented on LOONGARCH yet");
     //    //ssize_t compareImm = op2->AsIntCon()->IconValue();
@@ -4545,10 +4557,10 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
 
     //    //GetEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm);
     //}
-    //else
+    // else
     {
         instruction ins;
-        int regs;
+        int         regs;
         if (op2->AsIntCon()->gtIconVal)
         {
             assert(reg != REG_R21);
@@ -4562,16 +4574,16 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
             }
             GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
             regs = (int)reg << 5;
-            regs |= (int)REG_R21;//REG_R21
+            regs |= (int)REG_R21; // REG_R21
             ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne;
         }
         else
         {
             regs = (int)reg;
-            ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez;
+            ins  = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez;
         }
 
-        GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs);//5-bits;
+        GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs); // 5-bits;
     }
 }
 
@@ -4586,10 +4598,10 @@ int CodeGenInterface::genSPtoFPdelta() const
     int delta;
     if (IsSaveFpRaWithAllCalleeSavedRegisters())
     {
-        //delta = (compiler->compCalleeRegsPushed -2)* REGSIZE_BYTES + compiler->compLclFrameSize;
-        //assert(delta == genTotalFrameSize() - compiler->lvaArgSize - 2*8);
+        // delta = (compiler->compCalleeRegsPushed -2)* REGSIZE_BYTES + compiler->compLclFrameSize;
+        // assert(delta == genTotalFrameSize() - compiler->lvaArgSize - 2*8);
         delta = genTotalFrameSize() - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
-                (compiler->compCalleeRegsPushed -1)* REGSIZE_BYTES;
+                (compiler->compCalleeRegsPushed - 1) * REGSIZE_BYTES;
     }
     else
     {
@@ -4709,19 +4721,19 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
 
         callTarget = callTargetReg;
 
-        //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
-        //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
+        // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
         if (compiler->opts.compReloc)
         {
             GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
         }
         else
         {
-            //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
-            //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
-            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12);
+            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
+            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12);
             GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32);
-            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2);
         }
         regSet.verifyRegUsed(callTarget);
 
@@ -4731,9 +4743,9 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
     GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
                                retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
                                gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
-                               callTarget,                             /* ireg */
-                               REG_NA, 0, 0,                           /* xreg, xmul, disp */
-                               false                                   /* isJump */
+                               callTarget,                           /* ireg */
+                               REG_NA, 0, 0,                         /* xreg, xmul, disp */
+                               false                                 /* isJump */
                                );
 
     regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
@@ -5377,7 +5389,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
                     treeNode_next = treeNode_next->gtNext;
                 };
                 assert(treeNode_next->OperIs(GT_JTRUE));
-                //genCodeForJumpTrue(treeNode_next->AsOp());
+                // genCodeForJumpTrue(treeNode_next->AsOp());
                 genCodeForCompare(treeNode_next->AsOp());
             }
             break;
@@ -5502,7 +5514,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
         case GT_PINVOKE_PROLOG:
             noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
 
-            // the runtime side requires the codegen here to be consistent
+// the runtime side requires the codegen here to be consistent
 #ifdef PSEUDORANDOM_NOP_INSERTION
             emit->emitDisableRandomNops();
 #endif // PSEUDORANDOM_NOP_INSERTION
@@ -5600,19 +5612,24 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
     }
     else
     {
-        //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
-        //GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, initReg, initReg, 0);
+        // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, initReg, initReg, 0);
         if (compiler->opts.compReloc)
         {
-            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, initReg,
+                                       (ssize_t)compiler->gsGlobalSecurityCookieAddr);
         }
         else
         {
-            //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
-            //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, );
-            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, initReg, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000)>>12);
-            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
-            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff)>>2);
+            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, initReg,
+            // (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, initReg,
+                                      ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, initReg,
+                                      (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, initReg, initReg,
+                                        ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2);
         }
         regSet.verifyRegUsed(initReg);
         GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0);
@@ -5718,7 +5735,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
             if (storeIns == INS_st_w)
             {
                 emit->emitIns_R_R_R(INS_add_w, EA_4BYTE, source->GetRegNum(), source->GetRegNum(), REG_R0);
-                storeIns = INS_st_d;
+                storeIns  = INS_st_d;
                 storeAttr = EA_8BYTE;
             }
             emit->emitIns_S_R(storeIns, storeAttr, source->GetRegNum(), varNumOut, argOffsetOut);
@@ -5743,7 +5760,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
 
             // Setup loReg from the internal registers that we reserved in lower.
             //
-            regNumber loReg = treeNode->ExtractTempReg();
+            regNumber loReg   = treeNode->ExtractTempReg();
             regNumber addrReg = REG_NA;
 
             GenTreeLclVarCommon* varNode  = nullptr;
@@ -5787,11 +5804,11 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
 
             ClassLayout* layout;
 
-            //unsigned gcPtrCount; // The count of GC pointers in the struct
+            // unsigned gcPtrCount; // The count of GC pointers in the struct
             unsigned srcSize;
             bool     isHfa;
 
-            //gcPtrCount = treeNode->gtNumSlots;
+            // gcPtrCount = treeNode->gtNumSlots;
             // Setup the srcSize and layout
             if (source->OperGet() == GT_LCL_VAR)
             {
@@ -5804,8 +5821,8 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
                 assert(varDsc->lvOnFrame && !varDsc->lvRegister);
 
                 srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
-                                               // as that is how much stack is allocated for this LclVar
-                layout  = varDsc->GetLayout();
+                                            // as that is how much stack is allocated for this LclVar
+                layout = varDsc->GetLayout();
             }
             else // we must have a GT_OBJ
             {
@@ -5946,7 +5963,7 @@ void CodeGen::genPutArgReg(GenTreeOp* tree)
 #if 1
         else if (emitter::isFloatReg(targetReg))
             GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, op1->GetRegNum());
-        else //if (!emitter::isFloatReg(targetReg))
+        else // if (!emitter::isFloatReg(targetReg))
         {
             assert(!emitter::isFloatReg(targetReg));
             GetEmitter()->emitIns_R_R(INS_movfr2gr_d, EA_8BYTE, targetReg, op1->GetRegNum());
@@ -6088,7 +6105,7 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode)
 
             // We don't split HFA struct
             assert(!compiler->IsHfa(source->AsObj()->GetLayout()->GetClassHandle()));
-       }
+        }
 
         ClassLayout* layout = source->AsObj()->GetLayout();
 
@@ -6195,7 +6212,7 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
     genConsumeRegs(op1);
 
     const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
-    unsigned        regCount     = pRetTypeDesc->GetReturnRegCount();
+    unsigned              regCount     = pRetTypeDesc->GetReturnRegCount();
 
     if (treeNode->GetRegNum() != REG_NA)
     {
@@ -6251,9 +6268,9 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
     else
     {
         // Stack store
-        int offset = 0;
-        var_types type = pRetTypeDesc->GetReturnRegType(0);
-        regNumber reg  = call->GetRegNumByIdx(0);
+        int       offset = 0;
+        var_types type   = pRetTypeDesc->GetReturnRegType(0);
+        regNumber reg    = call->GetRegNumByIdx(0);
         if (op1->IsCopyOrReload())
         {
             // GT_COPY/GT_RELOAD will have valid reg for those positions
@@ -6271,8 +6288,8 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
         if (1 < regCount)
         {
             offset = genTypeSize(type);
-            type = pRetTypeDesc->GetReturnRegType(1);
-            reg  = call->GetRegNumByIdx(1);
+            type   = pRetTypeDesc->GetReturnRegType(1);
+            reg    = call->GetRegNumByIdx(1);
             offset = offset < genTypeSize(type) ? genTypeSize(type) : offset;
             GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
         }
@@ -6304,7 +6321,7 @@ void CodeGen::genRangeCheck(GenTree* oper)
     genConsumeRegs(arrIndex);
     genConsumeRegs(arrLen);
 
-    emitter* emit = GetEmitter();
+    emitter*             emit     = GetEmitter();
     GenTreeIntConCommon* intConst = nullptr;
     if (arrIndex->isContainedIntOrIImmed())
     {
@@ -6313,7 +6330,7 @@ void CodeGen::genRangeCheck(GenTree* oper)
         reg1 = REG_R21;
         reg2 = src1->GetRegNum();
 
-        intConst = src2->AsIntConCommon();
+        intConst    = src2->AsIntConCommon();
         ssize_t imm = intConst->IconValue();
         if (imm == INT64_MAX)
         {
@@ -6333,7 +6350,7 @@ void CodeGen::genRangeCheck(GenTree* oper)
 
         if (src2->isContainedIntOrIImmed())
         {
-            reg2 = REG_R21;
+            reg2        = REG_R21;
             ssize_t imm = src2->AsIntConCommon()->IconValue();
             emit->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
         }
@@ -6421,12 +6438,12 @@ void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
 
     // We will use a temp register to load the lower bound and dimension size values.
 
-    //regNumber tmpReg = arrIndex->GetSingleTempReg();
+    // regNumber tmpReg = arrIndex->GetSingleTempReg();
     assert(tgtReg != REG_R21);
 
-    unsigned  dim      = arrIndex->gtCurrDim;
-    unsigned  rank     = arrIndex->gtArrRank;
-    unsigned  offset;
+    unsigned dim  = arrIndex->gtCurrDim;
+    unsigned rank = arrIndex->gtArrRank;
+    unsigned offset;
 
     offset = compiler->eeGetMDArrayLowerBoundOffset(rank, dim);
     emit->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, arrReg, offset);
@@ -6473,11 +6490,11 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
         noway_assert(indexReg != REG_NA);
         noway_assert(arrReg != REG_NA);
 
-        //regNumber tmpReg = arrOffset->GetSingleTempReg();
+        // regNumber tmpReg = arrOffset->GetSingleTempReg();
 
-        unsigned  dim      = arrOffset->gtCurrDim;
-        unsigned  rank     = arrOffset->gtArrRank;
-        unsigned  offset   = compiler->eeGetMDArrayLengthOffset(rank, dim);
+        unsigned dim    = arrOffset->gtCurrDim;
+        unsigned rank   = arrOffset->gtArrRank;
+        unsigned offset = compiler->eeGetMDArrayLengthOffset(rank, dim);
 
         // Load tmpReg with the dimension size and evaluate
         // tgtReg = offsetReg*tmpReg + indexReg.
@@ -6508,10 +6525,10 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
 //
 void CodeGen::genCodeForShift(GenTree* tree)
 {
-    //var_types   targetType = tree->TypeGet();
-    //genTreeOps  oper       = tree->OperGet();
-    instruction ins        = genGetInsForOper(tree);
-    emitAttr    size       = emitActualTypeSize(tree);
+    // var_types   targetType = tree->TypeGet();
+    // genTreeOps  oper       = tree->OperGet();
+    instruction ins  = genGetInsForOper(tree);
+    emitAttr    size = emitActualTypeSize(tree);
 
     assert(tree->GetRegNum() != REG_NA);
 
@@ -6527,7 +6544,7 @@ void CodeGen::genCodeForShift(GenTree* tree)
     {
         unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal;
 
-        //should check shiftByImm for loongarch32-ins.
+        // should check shiftByImm for loongarch32-ins.
         unsigned immWidth = emitter::getBitWidth(size); // For LOONGARCH64, immWidth will be set to 32 or 64
         shiftByImm &= (immWidth - 1);
 
@@ -6632,7 +6649,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
     // The index is never contained, even if it is a constant.
     assert(index->isUsedFromReg());
 
-    //const regNumber tmpReg = node->GetSingleTempReg();
+    // const regNumber tmpReg = node->GetSingleTempReg();
 
     // Generate the bounds check if necessary.
     if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
@@ -6711,9 +6728,9 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
     instruction ins       = ins_Load(type);
     instruction ins2      = INS_none;
     regNumber   targetReg = tree->GetRegNum();
-    regNumber   tmpReg = targetReg;
-    emitAttr    attr = emitActualTypeSize(type);
-    int offset = 0;
+    regNumber   tmpReg    = targetReg;
+    emitAttr    attr      = emitActualTypeSize(type);
+    int         offset    = 0;
 
     genConsumeAddress(tree->Addr());
 
@@ -6848,7 +6865,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
 
     if (size >= 2 * REGSIZE_BYTES)
     {
-        regNumber tempReg2 = REG_R21;//cpBlkNode->ExtractTempReg(RBM_ALLINT);//TODO:should amend.
+        regNumber tempReg2 = REG_R21; // cpBlkNode->ExtractTempReg(RBM_ALLINT);//TODO:should amend.
 
         for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize;
              size -= regSize, srcOffset += regSize, dstOffset += regSize)
@@ -7178,8 +7195,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
 
         genEmitCall(emitter::EC_INDIR_R, methHnd,
                     INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
-                    retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di,
-                    target->GetRegNum(), call->IsFastTailCall());
+                    retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, target->GetRegNum(),
+                    call->IsFastTailCall());
     }
     else if (call->IsR2ROrVirtualStubRelativeIndir())
     {
@@ -7235,10 +7252,11 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
 
         assert(addr != nullptr);
 
-// Non-virtual direct call to known addresses
+        // Non-virtual direct call to known addresses
         {
             genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr,
-                        retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, REG_R21, call->IsFastTailCall());
+                        retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, REG_R21,
+                        call->IsFastTailCall());
         }
     }
 
@@ -7427,7 +7445,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
         {
             var_types loadType = TYP_UNDEF;
 
-            //NOTE for LOONGARCH: not supports the HFA.
+            // NOTE for LOONGARCH: not supports the HFA.
             assert(!varDsc->lvIsHfaRegArg());
             {
                 if (varTypeIsStruct(varDsc))
@@ -7451,13 +7469,14 @@ void CodeGen::genJmpMethod(GenTree* jmp)
                 GetEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0);
 
                 // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
-                // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting it.
+                // Note that we cannot modify varDsc->GetRegNum() here because another basic block may not be expecting
+                // it.
                 // Therefore manually update life of argReg.  Note that GT_JMP marks the end of the basic block
                 // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
                 regSet.AddMaskVars(genRegMask(argReg));
                 gcInfo.gcMarkRegPtrVal(argReg, loadType);
 
-                //if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
+                // if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
                 if (varDsc->GetOtherArgReg() < REG_STK)
                 {
                     // Restore the second register.
@@ -7506,7 +7525,6 @@ void CodeGen::genJmpMethod(GenTree* jmp)
                 firstArgVarNum = varNum;
             }
         }
-
     }
 
     // Jmp call to a vararg method - if the method has fewer than fixed arguments that can be max size of reg,
@@ -7580,7 +7598,7 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d
         {
             // We need to check if the value is not greater than 0x7FFFFFFF
             // if the upper 33 bits are zero.
-            //instGen_Set_Reg_To_Imm(EA_8BYTE, REG_R21, 0xFFFFFFFF80000000LL);
+            // instGen_Set_Reg_To_Imm(EA_8BYTE, REG_R21, 0xFFFFFFFF80000000LL);
             ssize_t imm = -1;
             GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm);
 
@@ -7606,19 +7624,19 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d
         default:
         {
             assert(desc.CheckKind() == GenIntCastDesc::CHECK_SMALL_INT_RANGE);
-            const int castMaxValue = desc.CheckSmallIntMax();
-            const int castMinValue = desc.CheckSmallIntMin();
+            const int   castMaxValue = desc.CheckSmallIntMax();
+            const int   castMinValue = desc.CheckSmallIntMin();
             instruction ins;
 
             if (castMaxValue > 2047)
-            {//should amend.   should confirm !?!?
+            { // should amend.   should confirm !?!?
                 assert((castMaxValue == 32767) || (castMaxValue == 65535));
                 GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_R21, castMaxValue + 1);
                 ins = castMinValue == 0 ? INS_bgeu : INS_bge;
                 genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_R21);
             }
             else
-            {//should amend.
+            { // should amend.
                 GetEmitter()->emitIns_R_R_I(INS_addi_w, EA_ATTR(desc.CheckSrcSize()), REG_R21, REG_R0, castMaxValue);
                 ins = castMinValue == 0 ? INS_bltu : INS_blt;
                 genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_R21, nullptr, reg);
@@ -7658,13 +7676,13 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
 {
     genConsumeRegs(cast->gtGetOp1());
 
-    emitter* emit = GetEmitter();
-    var_types dstType = cast->CastToType();
-    var_types srcType = genActualType(cast->gtGetOp1()->TypeGet());
-    const regNumber srcReg = cast->gtGetOp1()->GetRegNum();
-    const regNumber dstReg = cast->GetRegNum();
-    const unsigned char pos = 0;
-    const unsigned char size = 32;
+    emitter*            emit    = GetEmitter();
+    var_types           dstType = cast->CastToType();
+    var_types           srcType = genActualType(cast->gtGetOp1()->TypeGet());
+    const regNumber     srcReg  = cast->gtGetOp1()->GetRegNum();
+    const regNumber     dstReg  = cast->GetRegNum();
+    const unsigned char pos     = 0;
+    const unsigned char size    = 32;
 
     assert(genIsValidIntReg(srcReg));
     assert(genIsValidIntReg(dstReg));
@@ -7676,7 +7694,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
         genIntCastOverflowCheck(cast, desc, srcReg);
     }
 
-    //if ((EA_ATTR(genTypeSize(srcType)) == EA_8BYTE) && (EA_ATTR(genTypeSize(dstType)) == EA_4BYTE))
+    // if ((EA_ATTR(genTypeSize(srcType)) == EA_8BYTE) && (EA_ATTR(genTypeSize(dstType)) == EA_4BYTE))
     //{
     //    if (dstType == TYP_INT)
     //    {
@@ -7689,7 +7707,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
     //        emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos);
     //    }
     //}
-    //else if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg))
+    // else if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg))
     if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg))
     {
         instruction ins;
@@ -7699,11 +7717,11 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
             case GenIntCastDesc::ZERO_EXTEND_SMALL_INT:
                 if (desc.ExtendSrcSize() == 1)
                 {
-                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+7, pos);
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 7, pos);
                 }
                 else
                 {
-                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+15, pos);
+                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 15, pos);
                 }
                 break;
             case GenIntCastDesc::SIGN_EXTEND_SMALL_INT:
@@ -7712,7 +7730,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
                 break;
 #ifdef TARGET_64BIT
             case GenIntCastDesc::ZERO_EXTEND_INT:
-                emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos);
+                emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 31, pos);
                 break;
             case GenIntCastDesc::SIGN_EXTEND_INT:
                 emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);
@@ -7722,7 +7740,7 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
                 assert(desc.ExtendKind() == GenIntCastDesc::COPY);
 #if 1
                 if (srcType == TYP_INT)
-                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);//should amend.
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); // should amend.
                 else
                     emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0);
 #else
@@ -7759,7 +7777,7 @@ void CodeGen::genFloatToFloatCast(GenTree* treeNode)
     assert(genIsValidFloatReg(targetReg));
 
     GenTree* op1 = treeNode->AsOp()->gtOp1;
-    assert(!op1->isContained());               // Cannot be contained
+    assert(!op1->isContained());                  // Cannot be contained
     assert(genIsValidFloatReg(op1->GetRegNum())); // Must be a valid float reg.
 
     var_types dstType = treeNode->CastToType();
@@ -7843,7 +7861,7 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
     {
         unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar;
         assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM);
-        const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber);//TODO: unused.
+        const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber); // TODO: unused.
         gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar->GetStackOffset());
     }
 
@@ -7908,7 +7926,7 @@ const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32]
 void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstReg)
 {
     /* TODO for LOONGARCH64: should redesign and delete. */
-	assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 }
 
 //------------------------------------------------------------------------
@@ -7971,7 +7989,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
 #if 1
 //------------------------------------------------------------------------
 // genScaledAdd: A helper for genLeaInstruction.
-//TODO: can amend further.
+// TODO: can amend further.
 void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale)
 {
     emitter* emit = GetEmitter();
@@ -8209,7 +8227,8 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
         // loop:
         //      ldx_w  rTemp, sp, rOffset,
         //      sub_d  rOffset, rOffset, REG_R21
-        //      bge rOffset, rLimit, loop                 // If rLimit is less or equal rOffset, we need to probe this rOffset.
+        //      bge rOffset, rLimit, loop                 // If rLimit is less or equal rOffset, we need to probe this
+        //      rOffset.
 
         noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
 
@@ -8252,7 +8271,8 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
     }
 }
 
-inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2)
+inline void CodeGen::genJumpToThrowHlpBlk_la(
+    SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2)
 {
     assert(INS_beq <= ins && ins <= INS_bgeu);
 
@@ -8295,7 +8315,7 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instructi
         noway_assert(excpRaisingBlock != nullptr);
 
         // Jump to the exception-throwing block on error.
-        emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5));//5-bits;
+        emit->emitIns_J(ins, excpRaisingBlock, (int)reg1 | ((int)reg2 << 5)); // 5-bits;
     }
     else
     {
@@ -8303,52 +8323,53 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instructi
         //  we will jump around it in the normal non-exception case.
 
         void* pAddr = nullptr;
-        void* addr = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr);
+        void* addr  = compiler->compGetHelperFtn((CorInfoHelpFunc)(compiler->acdHelper(codeKind)), &pAddr);
         emitter::EmitCallType callType;
-        regNumber callTarget;
+        regNumber             callTarget;
 
         // maybe optimize
         // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne)));
-        if(ins == INS_blt)
+        if (ins == INS_blt)
             ins = INS_bge;
-        else if(ins == INS_bltu)
+        else if (ins == INS_bltu)
             ins = INS_bgeu;
-        else if(ins == INS_bge)
+        else if (ins == INS_bge)
             ins = INS_blt;
-        else if(ins == INS_bgeu)
+        else if (ins == INS_bgeu)
             ins = INS_bltu;
         else
             ins = ins == INS_beq ? INS_bne : INS_beq;
         if (addr == nullptr)
         {
-            callType = emitter::EC_INDIR_R;
+            callType   = emitter::EC_INDIR_R;
             callTarget = REG_DEFAULT_HELPER_CALL_TARGET;
 
-            //ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.//TODO: maybe optimize.
+            // ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.//TODO: maybe optimize.
 
-            //instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
-            //emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
+            // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+            // emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
             if (compiler->opts.compReloc)
             {
-                ssize_t imm = (2 + 1) << 2;// , 1=jirl.
+                ssize_t imm = (2 + 1) << 2; // , 1=jirl.
                 emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
                 GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
             }
             else
             {
-                ssize_t imm = (3 + 1) << 2;// , 1=jirl.
+                ssize_t imm = (3 + 1) << 2; // , 1=jirl.
                 emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
 
-                //GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
-                //GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
-                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000)>>12);
+                // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
+                // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
+                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12);
                 GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, callTarget, (ssize_t)pAddr >> 32);
-                GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff)>>2);
+                GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget,
+                                            ((ssize_t)pAddr & 0xfff) >> 2);
             }
         }
         else
-        {//INS_OPTS_C
-            callType = emitter::EC_FUNC_TOKEN;
+        { // INS_OPTS_C
+            callType   = emitter::EC_FUNC_TOKEN;
             callTarget = REG_NA;
 
             ssize_t imm = 5 << 2;
@@ -8358,13 +8379,13 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(SpecialCodeKind codeKind, instructi
             emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
         }
 
-        emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)), INDEBUG_LDISASM_COMMA(nullptr) addr, 0,
-                                   EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
-                                   gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
-                                   callTarget,                             /* ireg */
-                                   REG_NA, 0, 0,                           /* xreg, xmul, disp */
-                                   false                                   /* isJump */
-                                   );
+        emit->emitIns_Call(callType, compiler->eeFindHelper(compiler->acdHelper(codeKind)),
+                           INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur,
+                           gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
+                           callTarget,                                                    /* ireg */
+                           REG_NA, 0, 0,                                                  /* xreg, xmul, disp */
+                           false                                                          /* isJump */
+                           );
 
         regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)(compiler->acdHelper(codeKind)));
         regSet.verifyRegistersUsed(killMask);
@@ -8465,7 +8486,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     //
 
     rsPushRegs |= RBM_RA; // We must save the return address (in the RA register).
-    regSet.rsMaskCalleeSaved = rsPushRegs;
+    regSet.rsMaskCalleeSaved    = rsPushRegs;
     regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
     regMaskTP maskSaveRegsInt   = rsPushRegs & ~maskSaveRegsFloat;
 
@@ -8480,7 +8501,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     }
 #endif // DEBUG
 
-    // See the document "LOONGARCH64 JIT Frame Layout" and/or "LOONGARCH64 Exception Data" for more details or requirements and
+    // See the document "LOONGARCH64 JIT Frame Layout" and/or "LOONGARCH64 Exception Data" for more details or
+    // requirements and
     // options. Case numbers in comments here refer to this document. See also Compiler::lvaAssignFrameOffsets()
     // for pictures of the general frame layouts, and CodeGen::genFuncletProlog() implementations (per architecture)
     // for pictures of the funclet frame layouts.
@@ -8640,32 +8662,22 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
                 //      daddiu fp, sp, offset-fp
                 // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
 
-                JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
-                        totalFrameSize, compiler->compLclFrameSize);
+                JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
 
                 frameType = 1;
 
                 offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
 
-//#ifdef OPTIMIZE_LOONGSON_EXT
-//                if (!(offsetSpToSavedFp & 0xf) && (offsetSpToSavedFp <= 0xff0))
-//                {
-//                    GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offsetSpToSavedFp >> 4);
-//                    compiler->unwindSaveRegPair(REG_FP, REG_RA, offsetSpToSavedFp);
-//                }
-//                else
-//#endif
-//                {
                 GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp);
                 compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp);
 
                 GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8);
                 compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8);
-//                }
 
                 maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
 
-                offset = compiler->compLclFrameSize + 2*REGSIZE_BYTES;//FP/RA
+                offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA
             }
             else
             {
@@ -8673,8 +8685,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
 
                 offsetSpToSavedFp = genSPtoFPdelta();
 
-                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, fpDelta:%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
-                        totalFrameSize, compiler->compLclFrameSize, offsetSpToSavedFp);
+                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, fpDelta:%d\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize,
+                        offsetSpToSavedFp);
 
                 offset = compiler->compLclFrameSize;
             }
@@ -8683,27 +8696,28 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
         {
             if (!IsSaveFpRaWithAllCalleeSavedRegisters())
             {
-                JITDUMP("Frame type 3. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
-                        totalFrameSize, compiler->compLclFrameSize);
+                JITDUMP("Frame type 3. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
 
                 frameType = 3;
 
                 maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
 
-                offset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                offset            = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
                 calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
-                offset = calleeSaveSPDelta - offset;
+                offset            = calleeSaveSPDelta - offset;
             }
             else
             {
                 frameType = 4;
 
-                JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, SPDelta-1:%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
-                        totalFrameSize, compiler->compLclFrameSize, calleeSaveSPDelta);
+                JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, SPDelta-1:%d\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize,
+                        calleeSaveSPDelta);
 
-                offset = totalFrameSize - compiler->compLclFrameSize;
+                offset            = totalFrameSize - compiler->compLclFrameSize;
                 calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
-                offset = calleeSaveSPDelta - offset;
+                offset            = calleeSaveSPDelta - offset;
                 offsetSpToSavedFp = offset + REGSIZE_BYTES;
             }
         }
@@ -8726,28 +8740,28 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     JITDUMP("    offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta);
     genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
 
-    // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
-    // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
-    // need to add codes at all.
-
-    //if (compiler->info.compIsVarArgs)
-    //{
-    //    JITDUMP("    compIsVarArgs=true\n");
-
-    //    // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
-    //    assert((offset % 16) == 0);
-    //    for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
-    //    {
-    //        regNumber reg2 = REG_NEXT(reg1);
-    //        // sd REG, offset(SP)
-    //        // sd REG + 1, (offset+8)(SP)
-    //        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg1, REG_SPBASE, offset);
-    //        compiler->unwindNop();
-    //        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg2, REG_SPBASE, offset + 8);
-    //        compiler->unwindNop();
-    //        offset += 2 * REGSIZE_BYTES;
-    //    }
-    //}
+// For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
+// so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
+// need to add codes at all.
+
+// if (compiler->info.compIsVarArgs)
+//{
+//    JITDUMP("    compIsVarArgs=true\n");
+
+//    // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
+//    assert((offset % 16) == 0);
+//    for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
+//    {
+//        regNumber reg2 = REG_NEXT(reg1);
+//        // sd REG, offset(SP)
+//        // sd REG + 1, (offset+8)(SP)
+//        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg1, REG_SPBASE, offset);
+//        compiler->unwindNop();
+//        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg2, REG_SPBASE, offset + 8);
+//        compiler->unwindNop();
+//        offset += 2 * REGSIZE_BYTES;
+//    }
+//}
 
 #ifdef DEBUG
     if (compiler->opts.disAsm)
@@ -8755,39 +8769,29 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
 #endif
     if (frameType == 1)
     {
-        //offsetSpToSavedFp = genSPtoFPdelta();
+        // offsetSpToSavedFp = genSPtoFPdelta();
     }
     else if (frameType == 2)
     {
-        //offsetSpToSavedFp = genSPtoFPdelta();
+        // offsetSpToSavedFp = genSPtoFPdelta();
     }
     else if (frameType == 3)
     {
         if (compiler->lvaOutgoingArgSpaceSize >= 2040)
         {
-            offset = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
+            offset            = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
             calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
-            offset = calleeSaveSPDelta - offset;
+            offset            = calleeSaveSPDelta - offset;
 
             genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
 
             offsetSpToSavedFp = offset;
 
-//#ifdef OPTIMIZE_LOONGSON_EXT
-//            if (!(offset & 0xf) && (offset <= 0xff0))
-//            {
-//                GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4);
-//                compiler->unwindSaveRegPair(REG_FP, REG_RA, offset);
-//            }
-//            else
-//#endif
-//            {
             GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
             compiler->unwindSaveReg(REG_FP, offset);
 
             GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
             compiler->unwindSaveReg(REG_RA, offset + 8);
-//            }
 
             genEstablishFramePointer(offset, /* reportUnwindData */ true);
 
@@ -8801,14 +8805,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
 
             offset = compiler->lvaOutgoingArgSpaceSize;
 
-//#ifdef OPTIMIZE_LOONGSON_EXT
-//            if (!(offset & 0xf) && (offset <= 0xff0))
-//            {
-//                GetEmitter()->emitIns_R_R_R_I(INS_gssq, EA_PTRSIZE, REG_RA, REG_FP, REG_SPBASE, offset >> 4);
-//                compiler->unwindSaveRegPair(REG_FP, REG_RA, offset);
-//            }
-//            else
-//#endif
             GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
             compiler->unwindSaveReg(REG_FP, offset);
 
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 388fae2b695bb..88a5940879517 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -759,7 +759,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
 
 #elif defined(TARGET_LOONGARCH64)
                 // Structs that are pointer sized or smaller.
-                //assert(structSize > TARGET_POINTER_SIZE);
+                // assert(structSize > TARGET_POINTER_SIZE);
 
                 // On LOONGARCH64 structs that are 1-16 bytes are passed by value in one/multiple register(s)
                 if (structSize <= (TARGET_POINTER_SIZE * 2))
@@ -798,7 +798,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
             howToPassStruct = SPK_ByValue;
             useType         = TYP_STRUCT;
 
-#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)  || defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
             // Otherwise we pass this struct by reference to a copy
             // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy)
@@ -948,7 +948,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
             useType           = TYP_STRUCT;
         }
     }
-#endif //TARGET_LOONGARCH64
+#endif // TARGET_LOONGARCH64
 
     // Check for cases where a small struct is returned in a register
     // via a primitive type.
@@ -2464,7 +2464,7 @@ void Compiler::compSetProcessor()
 #endif
 
 #if defined(TARGET_LOONGARCH64)
-    //TODO: should add LOONGARCH64's features for LOONGARCH64.
+// TODO: should add LOONGARCH64's features for LOONGARCH64.
 #endif
 
     instructionSetFlags = EnsureInstructionSetFlagsAreValid(instructionSetFlags);
@@ -2652,7 +2652,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
         opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary;
     }
 #elif defined(TARGET_LOONGARCH64)
-    //TODO: should be adaptive on LoongArch64.
+// TODO: should be adaptive on LoongArch64.
 #endif
 
     assert(isPow2(opts.compJitAlignLoopBoundary));
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index fde453e53d7d6..a477e24def83a 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -472,11 +472,10 @@ class LclVarDsc
     unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref.
 #elif defined(TARGET_LOONGARCH64)
     unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref.
-    unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64.
-    unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64.
-    unsigned char lvIsSplit : 1;   // Set if the argument is splited. also used the lvFldOffset.
-#endif  // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH)
-
+    unsigned char lvIs4Field1 : 1;       // Set if the 1st field is int or float within struct for LA-ABI64.
+    unsigned char lvIs4Field2 : 1;       // Set if the 2nd field is int or float within struct for LA-ABI64.
+    unsigned char lvIsSplit : 1;         // Set if the argument is splited. also used the lvFldOffset.
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
     unsigned char lvIsBoolean : 1; // set if variable is boolean
     unsigned char lvSingleDef : 1; // variable has a single def
@@ -8029,7 +8028,7 @@ class Compiler
     }
 #elif defined(TARGET_LOONGARCH64)
     static bool varTypeNeedsPartialCalleeSave(var_types type)
-    {//TODO: supporting SIMD feature for LoongArch64.
+    { // TODO: supporting SIMD feature for LoongArch64.
         return false;
     }
 #else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
@@ -8723,7 +8722,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #elif defined(TARGET_ARM64)
         CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
 #elif defined(TARGET_LOONGARCH64)
-        //TODO: supporting SIMD feature for LoongArch64.
+        // TODO: supporting SIMD feature for LoongArch64.
         assert(!"unimplemented yet on LA");
         CORINFO_InstructionSet minimumIsa = 0;
 #else
@@ -12127,10 +12126,10 @@ const instruction INS_SQRT = INS_fsqrt;
 
 #ifdef TARGET_LOONGARCH64
 const instruction INS_BREAKPOINT = INS_break;
-const instruction INS_MULADD     = INS_fmadd_d;// NOTE: default is double.
-const instruction INS_ABS  = INS_fabs_d; // NOTE: default is double.
-const instruction INS_SQRT = INS_fsqrt_d;// NOTE: default is double.
-#endif // TARGET_LOONGARCH64
+const instruction INS_MULADD     = INS_fmadd_d; // NOTE: default is double.
+const instruction INS_ABS        = INS_fabs_d;  // NOTE: default is double.
+const instruction INS_SQRT       = INS_fsqrt_d; // NOTE: default is double.
+#endif                                          // TARGET_LOONGARCH64
 
 /*****************************************************************************/
 
diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp
index 23cd48bf2edb3..8e449aee28faa 100644
--- a/src/coreclr/jit/compiler.hpp
+++ b/src/coreclr/jit/compiler.hpp
@@ -1842,10 +1842,10 @@ inline void LclVarDsc::incRefCnts(weight_t weight, Compiler* comp, RefCountState
 
             bool doubleWeight = lvIsTemp;
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)|| defined(TARGET_LOONGARCH64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             // and, for the time being, implicit byref params
             doubleWeight |= lvIsImplicitByRef;
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
             if (doubleWeight && (weight * 2 > weight))
             {
diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp
index b53608757f384..898746249b619 100644
--- a/src/coreclr/jit/ee_il_dll.cpp
+++ b/src/coreclr/jit/ee_il_dll.cpp
@@ -460,7 +460,7 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO*
             // This struct is passed by reference using a single 'slot'
             return TARGET_POINTER_SIZE;
         }
-        //  otherwise will we pass this struct by value in multiple registers
+//  otherwise will we pass this struct by value in multiple registers
 #elif !defined(TARGET_ARM)
         NYI("unknown target");
 #endif // defined(TARGET_XXX)
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index 638e0cd5c7266..b09a3ac7483dd 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -743,7 +743,7 @@ insGroup* emitter::emitSavIG(bool emitAdd)
 
         ig->igFlags |= IGF_BYREF_REGS;
 
-        // We'll allocate extra space (DWORD aligned) to record the GC regs
+// We'll allocate extra space (DWORD aligned) to record the GC regs
 #if defined(TARGET_LOONGARCH64)
         gs += sizeof(regMaskTP);
 #else
@@ -759,7 +759,7 @@ insGroup* emitter::emitSavIG(bool emitAdd)
 
     if (ig->igFlags & IGF_BYREF_REGS)
     {
-        // Record the byref regs in front the of the instructions
+// Record the byref regs in front the of the instructions
 
 #if defined(TARGET_LOONGARCH64)
         *castto(id, regMaskTP*)++ = emitInitByrefRegs;
@@ -795,7 +795,7 @@ insGroup* emitter::emitSavIG(bool emitAdd)
     }
 #endif
 
-    // Record how many instructions and bytes of code this group contains
+// Record how many instructions and bytes of code this group contains
 
 #ifdef TARGET_LOONGARCH64
     noway_assert((unsigned int)emitCurIGinsCnt == emitCurIGinsCnt);
@@ -809,7 +809,7 @@ insGroup* emitter::emitSavIG(bool emitAdd)
 #else
     ig->igInsCnt = (BYTE)emitCurIGinsCnt;
 #endif
-    ig->igSize   = (unsigned short)emitCurIGsize;
+    ig->igSize = (unsigned short)emitCurIGsize;
     emitCurCodeOffset += emitCurIGsize;
     assert(IsCodeAligned(emitCurCodeOffset));
 
@@ -4157,16 +4157,19 @@ void emitter::emitJumpDistBind()
     int jmp_iteration = 1;
 
 #ifdef TARGET_LOONGARCH64
-    //NOTE:
+    // NOTE:
     //  bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J;
-    //  bit1 of isLinkingEnd_LA: indicating not needed updating ths size while emitTotalCodeSize <= (0x7fff << 2) or had updated;
+    //  bit1 of isLinkingEnd_LA: indicating not needed updating ths size while emitTotalCodeSize <= (0x7fff << 2) or had
+    //  updated;
     unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0;
 
     UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot.
     // small  jump max. neg distance
-    NATIVE_OFFSET  nsd = B_DIST_SMALL_MAX_NEG;
+    NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG;
     // small  jump max. pos distance
-    NATIVE_OFFSET  psd = B_DIST_SMALL_MAX_POS - emitCounts_INS_OPTS_J * (3 << 2);//the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+    NATIVE_OFFSET psd =
+        B_DIST_SMALL_MAX_POS -
+        emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
 #endif
 
 /*****************************************************************************/
@@ -4330,7 +4333,7 @@ void emitter::emitJumpDistBind()
 #ifdef DEBUG
 #if defined(TARGET_LOONGARCH64)
 #if defined(UNALIGNED_CHECK_DISABLE)
-	UNALIGNED_CHECK_DISABLE;
+        UNALIGNED_CHECK_DISABLE;
 #endif
         assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < (jmp->idjOffs + adjLJ));
 #else
@@ -4374,7 +4377,7 @@ void emitter::emitJumpDistBind()
 #else
                                lstIG->igOffs - adjIG
 #endif
-                            );
+                               );
                     }
 #endif // DEBUG
 #if defined(TARGET_LOONGARCH64)
@@ -4392,7 +4395,7 @@ void emitter::emitJumpDistBind()
             lstIG = jmpIG;
         }
 
-        /* Apply any local size adjustment to the jump's relative offset */
+/* Apply any local size adjustment to the jump's relative offset */
 
 #if defined(TARGET_LOONGARCH64)
         jmp->idjOffs += adjLJ;
@@ -4552,11 +4555,11 @@ void emitter::emitJumpDistBind()
 
         if (jmpIG->igNum < tgtIG->igNum)
         {
-            /* Forward jump */
+/* Forward jump */
 
-            /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between
-               here and the target could be shortened, causing the actual distance to shrink.
-             */
+/* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between
+   here and the target could be shortened, causing the actual distance to shrink.
+ */
 
 #if defined(TARGET_LOONGARCH64)
             dstOffs += adjIG;
@@ -4598,7 +4601,7 @@ void emitter::emitJumpDistBind()
 #endif // DEBUG_EMIT
 
 #if defined(TARGET_LOONGARCH64)
-            assert(jmpDist >= 0);//Forward jump
+            assert(jmpDist >= 0); // Forward jump
             assert(!(jmpDist & 0x3));
 
             if (isLinkingEnd_LA & 0x2)
@@ -4610,38 +4613,39 @@ void emitter::emitJumpDistBind()
                 instruction ins = jmp->idIns();
                 assert((INS_bceqz <= ins) && (ins <= INS_bl));
 
-                if (ins < INS_beqz)  //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                if (ins <
+                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
                 {
-                    if ((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000)
+                    if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
                     {
                         extra = 4;
                     }
                     else
                     {
-                        assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO:later will be deleted!!!
+                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO:later will be deleted!!!
                         extra = 8;
                     }
                 }
-                else if (ins < INS_b)//   beqz/bnez < b < bl    // See instrsloongarch64.h.
+                else if (ins < INS_b) //   beqz/bnez < b < bl    // See instrsloongarch64.h.
                 {
-                    if (jmpDist + emitCounts_INS_OPTS_J*4 < 0x200000 )
+                    if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000)
                         continue;
 
                     extra = 4;
-                    //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
-                    assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
                 }
-                else //if (ins == INS_b || ins == INS_bl)
+                else // if (ins == INS_b || ins == INS_bl)
                 {
                     assert(ins == INS_b || ins == INS_bl);
-                    //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
-                    assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
                     continue;
                 }
 
                 jmp->idInsOpt(INS_OPTS_JIRL);
                 jmp->idCodeSize(jmp->idCodeSize() + extra);
-                jmpIG->igSize += extra;//the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                jmpIG->igSize += extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
                 adjLJ += extra;
                 adjIG += extra;
                 emitTotalCodeSize += extra;
@@ -4696,50 +4700,51 @@ void emitter::emitJumpDistBind()
 #endif // DEBUG_EMIT
 
 #if defined(TARGET_LOONGARCH64)
-            assert(jmpDist >= 0);//Backward jump
+            assert(jmpDist >= 0); // Backward jump
             assert(!(jmpDist & 0x3));
 
             if (isLinkingEnd_LA & 0x2)
             {
-                jmp->idAddr()->iiaSetJmpOffset(-jmpDist);//Backward jump is negative!
+                jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative!
             }
             else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J))
             {
                 instruction ins = jmp->idIns();
                 assert((INS_bceqz <= ins) && (ins <= INS_bl));
 
-                if (ins < INS_beqz)  //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                if (ins <
+                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
                 {
-                    if ((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000)
+                    if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
                     {
                         extra = 4;
                     }
                     else
                     {
-                        assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO:later will be deleted!!!
+                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO:later will be deleted!!!
                         extra = 8;
                     }
                 }
-                else if (ins < INS_b)//   beqz/bnez < b < bl    // See instrsloongarch64.h.
+                else if (ins < INS_b) //   beqz/bnez < b < bl    // See instrsloongarch64.h.
                 {
-                    if (jmpDist + emitCounts_INS_OPTS_J*4 < 0x200000 )
+                    if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000)
                         continue;
 
                     extra = 4;
-                    //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
-                    assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
                 }
-                else //if (ins == INS_b || ins == INS_bl)
+                else // if (ins == INS_b || ins == INS_bl)
                 {
                     assert(ins == INS_b || ins == INS_bl);
-                    //assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
-                    assert((jmpDist + emitCounts_INS_OPTS_J*4) < 0x8000000);//TODO
+                    // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO
                     continue;
                 }
 
                 jmp->idInsOpt(INS_OPTS_JIRL);
                 jmp->idCodeSize(jmp->idCodeSize() + extra);
-                jmpIG->igSize += extra;//the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                jmpIG->igSize += extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
                 adjLJ += extra;
                 adjIG += extra;
                 emitTotalCodeSize += extra;
@@ -4951,18 +4956,18 @@ void emitter::emitJumpDistBind()
 
         jmpIG->igFlags |= IGF_UPD_ISZ;
 
-#endif  // not defined(TARGET_LOONGARCH64)
-    } // end for each jump
+#endif // not defined(TARGET_LOONGARCH64)
+    }  // end for each jump
 
 #if defined(TARGET_LOONGARCH64)
     if ((isLinkingEnd_LA & 0x3) < 0x2)
     {
-        //indicating had updated the instrDescJmp's size with the type INS_OPTS_J.
+        // indicating had updated the instrDescJmp's size with the type INS_OPTS_J.
         isLinkingEnd_LA = 0x2;
-        //emitRecomputeIGoffsets();
+        // emitRecomputeIGoffsets();
         /* Adjust offsets of any remaining blocks */
 
-        for (;lstIG;)
+        for (; lstIG;)
         {
             lstIG = lstIG->igNext;
             if (!lstIG)
@@ -6820,7 +6825,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
     }
 
 #ifdef TARGET_LOONGARCH64
-    //cp = cp - 4;
+    // cp = cp - 4;
     unsigned actualCodeSize = cp - codeBlock;
 #endif
 
@@ -6949,7 +6954,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
             }
         }
     }
-#endif   //!TARGET_LOONGARCH64
+#endif //! TARGET_LOONGARCH64
 
 #ifdef DEBUG
     if (emitComp->opts.disAsm)
@@ -8925,7 +8930,7 @@ cnsval_ssize_t emitter::emitGetInsSC(instrDesc* id)
         int  adr = emitComp->lvaFrameAddress(varNum, &FPbased);
         int  dsp = adr + offs;
         if (id->idIns() == INS_sub)
-            dsp = -dsp;
+            dsp    = -dsp;
 #endif
         return dsp;
     }
@@ -9538,7 +9543,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
             result = RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI);
             break;
 #elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
-            result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
+            result      = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
             break;
 #else
             assert(!"unknown arch");
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 66a9ccaf207ee..15b84ae2b4cec 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -303,7 +303,7 @@ struct insGroup
 #if EMIT_TRACK_STACK_DEPTH
     unsigned igStkLvl; // stack level on entry
 #endif
-    regMaskSmall  igGCregs; // set of registers with live GC refs
+    regMaskSmall igGCregs; // set of registers with live GC refs
 #ifdef TARGET_LOONGARCH64
     unsigned int igInsCnt; // # of instructions  in this group
 #else
@@ -611,9 +611,10 @@ class emitter
         static_assert_no_msg(IF_COUNT <= 128);
         insFormat _idInsFmt : 7;
 #elif defined(TARGET_LOONGARCH64)
-        //insFormat _idInsFmt : 5;// NOTE: LOONGARCH64 does not used the _idInsFmt .
-        unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the _idInsCount.
-        //unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described.
+        // insFormat _idInsFmt : 5;// NOTE: LOONGARCH64 does not used the _idInsFmt .
+        unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the
+                                  // _idInsCount.
+                                  // unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described.
 #else
         static_assert_no_msg(IF_COUNT <= 256);
         insFormat _idInsFmt : 8;
@@ -632,7 +633,7 @@ class emitter
 
 #if defined(TARGET_LOONGARCH64)
         insFormat idInsFmt() const
-        {//not used for LOONGARCH64.
+        { // not used for LOONGARCH64.
             return (insFormat)0;
         }
         void idInsFmt(insFormat insFmt)
@@ -665,7 +666,7 @@ class emitter
         // amd64: 17 bits
         // arm:   16 bits
         // arm64: 17 bits
-        //loongarch64: 14 bits
+        // loongarch64: 14 bits
 
     private:
 #if defined(TARGET_XARCH)
@@ -676,7 +677,7 @@ class emitter
 #elif defined(TARGET_ARM64)
 // Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields
 #elif defined(TARGET_LOONGARCH64)
-        /* _idOpSize defined bellow. */
+/* _idOpSize defined bellow. */
 #else  // ARM
         opSize      _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
 #endif // ARM
@@ -729,8 +730,9 @@ class emitter
 
 #ifdef TARGET_LOONGARCH64
         /* TODO: for LOONGARCH: maybe delete on future. */
-        opSize   _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16
-        insOpts  _idInsOpt : 6; // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the accessing a local on stack.
+        opSize  _idOpSize : 3;  // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16
+        insOpts _idInsOpt : 6;  // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the
+                                // accessing a local on stack.
         unsigned _idLclVar : 1; // access a local on stack.
 #endif
 
@@ -748,10 +750,10 @@ class emitter
 // For Arm64, we have used 17 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (17)
 #elif defined(TARGET_XARCH)
-                                   // For xarch, we have used 14 bits from the second DWORD.
+// For xarch, we have used 14 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (14)
 #elif defined(TARGET_LOONGARCH64)
-// For Loongarch64, we have used 14 bits from the second DWORD.
+                                   // For Loongarch64, we have used 14 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (14)
 #else
 #error Unsupported or unset target architecture
@@ -763,7 +765,7 @@ class emitter
         // amd64: 46 bits
         // arm:   48 bits
         // arm64: 49 bits
-        //loongarch64: 46 bits
+        // loongarch64: 46 bits
 
         unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag
         unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag
@@ -911,14 +913,14 @@ class emitter
 #elif defined(TARGET_LOONGARCH64) // TARGET_XARCH
             struct
             {
-                unsigned int iiaEncodedInstr;//instruction's binary encoding.
-                regNumber _idReg3 : REGNUM_BITS;
-                regNumber _idReg4 : REGNUM_BITS;
+                unsigned int iiaEncodedInstr; // instruction's binary encoding.
+                regNumber    _idReg3 : REGNUM_BITS;
+                regNumber    _idReg4 : REGNUM_BITS;
             };
 
             struct
             {
-                int iiaJmpOffset;//temporary saving the offset of jmp or data.
+                int            iiaJmpOffset; // temporary saving the offset of jmp or data.
                 emitLclVarAddr iiaLclVar;
             };
 
@@ -939,7 +941,7 @@ class emitter
             {
                 return iiaJmpOffset;
             }
-#endif // defined(TARGET_LOONGARCH64)
+#endif                            // defined(TARGET_LOONGARCH64)
 
         } _idAddrUnion;
 
@@ -1043,7 +1045,7 @@ class emitter
 #elif defined(TARGET_LOONGARCH64)
         unsigned idCodeSize() const
         {
-            return _idCodeSize;//_idInsCount;
+            return _idCodeSize; //_idInsCount;
         }
         void idCodeSize(unsigned sz)
         {
@@ -1053,7 +1055,7 @@ class emitter
 #endif // TARGET_LOONGARCH64
 
         emitAttr idOpSize()
-        {//NOTE: not used for LOONGARCH64.
+        { // NOTE: not used for LOONGARCH64.
             return emitDecodeSize(_idOpSize);
         }
         void idOpSize(emitAttr opsz)
@@ -1888,8 +1890,8 @@ class emitter
 #endif // !defined(HOST_64BIT)
 
 #ifdef TARGET_LOONGARCH64
-    unsigned int emitCounts_INS_OPTS_J;//INS_OPTS_J
-#endif // defined(TARGET_LOONGARCH64)
+    unsigned int emitCounts_INS_OPTS_J; // INS_OPTS_J
+#endif                                  // defined(TARGET_LOONGARCH64)
 
     size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp);
     size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp);
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index f7c06ad5cc7f6..e58ccb61282bb 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -29,349 +29,348 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 ////These are used for loongarch64 instrs's dump.
 ////LA_OP_2R  opcode: bit31 ~ bit10
-#define LA_2R_CLO_W         0x4
-#define LA_2R_CLZ_W         0x5
-#define LA_2R_CTO_W         0x6
-#define LA_2R_CTZ_W         0x7
-#define LA_2R_CLO_D         0x8
-#define LA_2R_CLZ_D         0x9
-#define LA_2R_CTO_D         0xa
-#define LA_2R_CTZ_D         0xb
-#define LA_2R_REVB_2H       0xc
-#define LA_2R_REVB_4H       0xd
-#define LA_2R_REVB_2W       0xe
-#define LA_2R_REVB_D        0xf
-#define LA_2R_REVH_2W       0x10
-#define LA_2R_REVH_D        0x11
-#define LA_2R_BITREV_4B     0x12
-#define LA_2R_BITREV_8B     0x13
-#define LA_2R_BITREV_W      0x14
-#define LA_2R_BITREV_D      0x15
-#define LA_2R_EXT_W_H       0x16
-#define LA_2R_EXT_W_B       0x17
-#define LA_2R_RDTIMEL_W     0x18
-#define LA_2R_RDTIMEH_W     0x19
-#define LA_2R_RDTIME_D      0x1a
-#define LA_2R_CPUCFG        0x1b
-#define LA_2R_ASRTLE_D      0x2
-#define LA_2R_ASRTGT_D      0x3
-#define LA_2R_FABS_S        0x4501
-#define LA_2R_FABS_D        0x4502
-#define LA_2R_FNEG_S        0x4505
-#define LA_2R_FNEG_D        0x4506
-#define LA_2R_FLOGB_S       0x4509
-#define LA_2R_FLOGB_D       0x450a
-#define LA_2R_FCLASS_S      0x450d
-#define LA_2R_FCLASS_D      0x450e
-#define LA_2R_FSQRT_S       0x4511
-#define LA_2R_FSQRT_D       0x4512
-#define LA_2R_FRECIP_S      0x4515
-#define LA_2R_FRECIP_D      0x4516
-#define LA_2R_FRSQRT_S      0x4519
-#define LA_2R_FRSQRT_D      0x451a
-#define LA_2R_FMOV_S        0x4525
-#define LA_2R_FMOV_D        0x4526
-#define LA_2R_MOVGR2FR_W    0x4529
-#define LA_2R_MOVGR2FR_D    0x452a
-#define LA_2R_MOVGR2FRH_W   0x452b
-#define LA_2R_MOVFR2GR_S    0x452d
-#define LA_2R_MOVFR2GR_D    0x452e
-#define LA_2R_MOVFRH2GR_S   0x452f
-#define LA_2R_MOVGR2FCSR    0x4530
-#define LA_2R_MOVFCSR2GR    0x4532
-#define LA_2R_MOVFR2CF      0x4534
-#define LA_2R_MOVCF2FR      0x4535
-#define LA_2R_MOVGR2CF      0x4536
-#define LA_2R_MOVCF2GR      0x4537
-#define LA_2R_FCVT_S_D      0x4646
-#define LA_2R_FCVT_D_S      0x4649
-#define LA_2R_FTINTRM_W_S   0x4681
-#define LA_2R_FTINTRM_W_D   0x4682
-#define LA_2R_FTINTRM_L_S   0x4689
-#define LA_2R_FTINTRM_L_D   0x468a
-#define LA_2R_FTINTRP_W_S   0x4691
-#define LA_2R_FTINTRP_W_D   0x4692
-#define LA_2R_FTINTRP_L_S   0x4699
-#define LA_2R_FTINTRP_L_D   0x469a
-#define LA_2R_FTINTRZ_W_S   0x46a1
-#define LA_2R_FTINTRZ_W_D   0x46a2
-#define LA_2R_FTINTRZ_L_S   0x46a9
-#define LA_2R_FTINTRZ_L_D   0x46aa
-#define LA_2R_FTINTRNE_W_S  0x46b1
-#define LA_2R_FTINTRNE_W_D  0x46b2
-#define LA_2R_FTINTRNE_L_S  0x46b9
-#define LA_2R_FTINTRNE_L_D  0x46ba
-#define LA_2R_FTINT_W_S     0x46c1
-#define LA_2R_FTINT_W_D     0x46c2
-#define LA_2R_FTINT_L_S     0x46c9
-#define LA_2R_FTINT_L_D     0x46ca
-#define LA_2R_FFINT_S_W     0x4744
-#define LA_2R_FFINT_S_L     0x4746
-#define LA_2R_FFINT_D_W     0x4748
-#define LA_2R_FFINT_D_L     0x474a
-#define LA_2R_FRINT_S       0x4791
-#define LA_2R_FRINT_D       0x4792
-#define LA_2R_IOCSRRD_B     0x19200
-#define LA_2R_IOCSRRD_H     0x19201
-#define LA_2R_IOCSRRD_W     0x19202
-#define LA_2R_IOCSRRD_D     0x19203
-#define LA_2R_IOCSRWR_B     0x19204
-#define LA_2R_IOCSRWR_H     0x19205
-#define LA_2R_IOCSRWR_W     0x19206
-#define LA_2R_IOCSRWR_D     0x19207
+#define LA_2R_CLO_W 0x4
+#define LA_2R_CLZ_W 0x5
+#define LA_2R_CTO_W 0x6
+#define LA_2R_CTZ_W 0x7
+#define LA_2R_CLO_D 0x8
+#define LA_2R_CLZ_D 0x9
+#define LA_2R_CTO_D 0xa
+#define LA_2R_CTZ_D 0xb
+#define LA_2R_REVB_2H 0xc
+#define LA_2R_REVB_4H 0xd
+#define LA_2R_REVB_2W 0xe
+#define LA_2R_REVB_D 0xf
+#define LA_2R_REVH_2W 0x10
+#define LA_2R_REVH_D 0x11
+#define LA_2R_BITREV_4B 0x12
+#define LA_2R_BITREV_8B 0x13
+#define LA_2R_BITREV_W 0x14
+#define LA_2R_BITREV_D 0x15
+#define LA_2R_EXT_W_H 0x16
+#define LA_2R_EXT_W_B 0x17
+#define LA_2R_RDTIMEL_W 0x18
+#define LA_2R_RDTIMEH_W 0x19
+#define LA_2R_RDTIME_D 0x1a
+#define LA_2R_CPUCFG 0x1b
+#define LA_2R_ASRTLE_D 0x2
+#define LA_2R_ASRTGT_D 0x3
+#define LA_2R_FABS_S 0x4501
+#define LA_2R_FABS_D 0x4502
+#define LA_2R_FNEG_S 0x4505
+#define LA_2R_FNEG_D 0x4506
+#define LA_2R_FLOGB_S 0x4509
+#define LA_2R_FLOGB_D 0x450a
+#define LA_2R_FCLASS_S 0x450d
+#define LA_2R_FCLASS_D 0x450e
+#define LA_2R_FSQRT_S 0x4511
+#define LA_2R_FSQRT_D 0x4512
+#define LA_2R_FRECIP_S 0x4515
+#define LA_2R_FRECIP_D 0x4516
+#define LA_2R_FRSQRT_S 0x4519
+#define LA_2R_FRSQRT_D 0x451a
+#define LA_2R_FMOV_S 0x4525
+#define LA_2R_FMOV_D 0x4526
+#define LA_2R_MOVGR2FR_W 0x4529
+#define LA_2R_MOVGR2FR_D 0x452a
+#define LA_2R_MOVGR2FRH_W 0x452b
+#define LA_2R_MOVFR2GR_S 0x452d
+#define LA_2R_MOVFR2GR_D 0x452e
+#define LA_2R_MOVFRH2GR_S 0x452f
+#define LA_2R_MOVGR2FCSR 0x4530
+#define LA_2R_MOVFCSR2GR 0x4532
+#define LA_2R_MOVFR2CF 0x4534
+#define LA_2R_MOVCF2FR 0x4535
+#define LA_2R_MOVGR2CF 0x4536
+#define LA_2R_MOVCF2GR 0x4537
+#define LA_2R_FCVT_S_D 0x4646
+#define LA_2R_FCVT_D_S 0x4649
+#define LA_2R_FTINTRM_W_S 0x4681
+#define LA_2R_FTINTRM_W_D 0x4682
+#define LA_2R_FTINTRM_L_S 0x4689
+#define LA_2R_FTINTRM_L_D 0x468a
+#define LA_2R_FTINTRP_W_S 0x4691
+#define LA_2R_FTINTRP_W_D 0x4692
+#define LA_2R_FTINTRP_L_S 0x4699
+#define LA_2R_FTINTRP_L_D 0x469a
+#define LA_2R_FTINTRZ_W_S 0x46a1
+#define LA_2R_FTINTRZ_W_D 0x46a2
+#define LA_2R_FTINTRZ_L_S 0x46a9
+#define LA_2R_FTINTRZ_L_D 0x46aa
+#define LA_2R_FTINTRNE_W_S 0x46b1
+#define LA_2R_FTINTRNE_W_D 0x46b2
+#define LA_2R_FTINTRNE_L_S 0x46b9
+#define LA_2R_FTINTRNE_L_D 0x46ba
+#define LA_2R_FTINT_W_S 0x46c1
+#define LA_2R_FTINT_W_D 0x46c2
+#define LA_2R_FTINT_L_S 0x46c9
+#define LA_2R_FTINT_L_D 0x46ca
+#define LA_2R_FFINT_S_W 0x4744
+#define LA_2R_FFINT_S_L 0x4746
+#define LA_2R_FFINT_D_W 0x4748
+#define LA_2R_FFINT_D_L 0x474a
+#define LA_2R_FRINT_S 0x4791
+#define LA_2R_FRINT_D 0x4792
+#define LA_2R_IOCSRRD_B 0x19200
+#define LA_2R_IOCSRRD_H 0x19201
+#define LA_2R_IOCSRRD_W 0x19202
+#define LA_2R_IOCSRRD_D 0x19203
+#define LA_2R_IOCSRWR_B 0x19204
+#define LA_2R_IOCSRWR_H 0x19205
+#define LA_2R_IOCSRWR_W 0x19206
+#define LA_2R_IOCSRWR_D 0x19207
 
 ////LA_OP_3R  opcode: bit31 ~ bit15
-#define LA_3R_ADD_W        0x20
-#define LA_3R_ADD_D        0x21
-#define LA_3R_SUB_W        0x22
-#define LA_3R_SUB_D        0x23
-#define LA_3R_SLT          0x24
-#define LA_3R_SLTU         0x25
-#define LA_3R_MASKEQZ      0x26
-#define LA_3R_MASKNEZ      0x27
-#define LA_3R_NOR          0x28
-#define LA_3R_AND          0x29
-#define LA_3R_OR           0x2a
-#define LA_3R_XOR          0x2b
-#define LA_3R_ORN          0x2c
-#define LA_3R_ANDN         0x2d
-#define LA_3R_SLL_W        0x2e
-#define LA_3R_SRL_W        0x2f
-#define LA_3R_SRA_W        0x30
-#define LA_3R_SLL_D        0x31
-#define LA_3R_SRL_D        0x32
-#define LA_3R_SRA_D        0x33
-#define LA_3R_ROTR_W       0x36
-#define LA_3R_ROTR_D       0x37
-#define LA_3R_MUL_W        0x38
-#define LA_3R_MULH_W       0x39
-#define LA_3R_MULH_WU      0x3a
-#define LA_3R_MUL_D        0x3b
-#define LA_3R_MULH_D       0x3c
-#define LA_3R_MULH_DU      0x3d
-#define LA_3R_MULW_D_W     0x3e
-#define LA_3R_MULW_D_WU    0x3f
-#define LA_3R_DIV_W        0x40
-#define LA_3R_MOD_W        0x41
-#define LA_3R_DIV_WU       0x42
-#define LA_3R_MOD_WU       0x43
-#define LA_3R_DIV_D        0x44
-#define LA_3R_MOD_D        0x45
-#define LA_3R_DIV_DU       0x46
-#define LA_3R_MOD_DU       0x47
-#define LA_3R_CRC_W_B_W    0x48
-#define LA_3R_CRC_W_H_W    0x49
-#define LA_3R_CRC_W_W_W    0x4a
-#define LA_3R_CRC_W_D_W    0x4b
-#define LA_3R_CRCC_W_B_W   0x4c
-#define LA_3R_CRCC_W_H_W   0x4d
-#define LA_3R_CRCC_W_W_W   0x4e
-#define LA_3R_CRCC_W_D_W   0x4f
-#define LA_3R_FADD_S       0x201
-#define LA_3R_FADD_D       0x202
-#define LA_3R_FSUB_S       0x205
-#define LA_3R_FSUB_D       0x206
-#define LA_3R_FMUL_S       0x209
-#define LA_3R_FMUL_D       0x20a
-#define LA_3R_FDIV_S       0x20d
-#define LA_3R_FDIV_D       0x20e
-#define LA_3R_FMAX_S       0x211
-#define LA_3R_FMAX_D       0x212
-#define LA_3R_FMIN_S       0x215
-#define LA_3R_FMIN_D       0x216
-#define LA_3R_FMAXA_S      0x219
-#define LA_3R_FMAXA_D      0x21a
-#define LA_3R_FMINA_S      0x21d
-#define LA_3R_FMINA_D      0x21e
-#define LA_3R_FSCALEB_S    0x221
-#define LA_3R_FSCALEB_D    0x222
-#define LA_3R_FCOPYSIGN_S  0x225
-#define LA_3R_FCOPYSIGN_D  0x226
-#define LA_3R_INVTLB       0xc91
-#define LA_3R_LDX_B        0x7000
-#define LA_3R_LDX_H        0x7008
-#define LA_3R_LDX_W        0x7010
-#define LA_3R_LDX_D        0x7018
-#define LA_3R_STX_B        0x7020
-#define LA_3R_STX_H        0x7028
-#define LA_3R_STX_W        0x7030
-#define LA_3R_STX_D        0x7038
-#define LA_3R_LDX_BU       0x7040
-#define LA_3R_LDX_HU       0x7048
-#define LA_3R_LDX_WU       0x7050
-#define LA_3R_PRELDX       0x7058
-#define LA_3R_FLDX_S       0x7060
-#define LA_3R_FLDX_D       0x7068
-#define LA_3R_FSTX_S       0x7070
-#define LA_3R_FSTX_D       0x7078
-#define LA_3R_AMSWAP_W     0x70c0
-#define LA_3R_AMSWAP_D     0x70c1
-#define LA_3R_AMADD_W      0x70c2
-#define LA_3R_AMADD_D      0x70c3
-#define LA_3R_AMAND_W      0x70c4
-#define LA_3R_AMAND_D      0x70c5
-#define LA_3R_AMOR_W       0x70c6
-#define LA_3R_AMOR_D       0x70c7
-#define LA_3R_AMXOR_W      0x70c8
-#define LA_3R_AMXOR_D      0x70c9
-#define LA_3R_AMMAX_W      0x70ca
-#define LA_3R_AMMAX_D      0x70cb
-#define LA_3R_AMMIN_W      0x70cc
-#define LA_3R_AMMIN_D      0x70cd
-#define LA_3R_AMMAX_WU     0x70ce
-#define LA_3R_AMMAX_DU     0x70cf
-#define LA_3R_AMMIN_WU     0x70d0
-#define LA_3R_AMMIN_DU     0x70d1
-#define LA_3R_AMSWAP_DB_W  0x70d2
-#define LA_3R_AMSWAP_DB_D  0x70d3
-#define LA_3R_AMADD_DB_W   0x70d4
-#define LA_3R_AMADD_DB_D   0x70d5
-#define LA_3R_AMAND_DB_W   0x70d6
-#define LA_3R_AMAND_DB_D   0x70d7
-#define LA_3R_AMOR_DB_W    0x70d8
-#define LA_3R_AMOR_DB_D    0x70d9
-#define LA_3R_AMXOR_DB_W   0x70da
-#define LA_3R_AMXOR_DB_D   0x70db
-#define LA_3R_AMMAX_DB_W   0x70dc
-#define LA_3R_AMMAX_DB_D   0x70dd
-#define LA_3R_AMMIN_DB_W   0x70de
-#define LA_3R_AMMIN_DB_D   0x70df
-#define LA_3R_AMMAX_DB_WU  0x70e0
-#define LA_3R_AMMAX_DB_DU  0x70e1
-#define LA_3R_AMMIN_DB_WU  0x70e2
-#define LA_3R_AMMIN_DB_DU  0x70e3
-#define LA_3R_FLDGT_S      0x70e8
-#define LA_3R_FLDGT_D      0x70e9
-#define LA_3R_FLDLE_S      0x70ea
-#define LA_3R_FLDLE_D      0x70eb
-#define LA_3R_FSTGT_S      0x70ec
-#define LA_3R_FSTGT_D      0x70ed
-#define LA_3R_FSTLE_S      0x70ee
-#define LA_3R_FSTLE_D      0x70ef
-#define LA_3R_LDGT_B       0x70f0
-#define LA_3R_LDGT_H       0x70f1
-#define LA_3R_LDGT_W       0x70f2
-#define LA_3R_LDGT_D       0x70f3
-#define LA_3R_LDLE_B       0x70f4
-#define LA_3R_LDLE_H       0x70f5
-#define LA_3R_LDLE_W       0x70f6
-#define LA_3R_LDLE_D       0x70f7
-#define LA_3R_STGT_B       0x70f8
-#define LA_3R_STGT_H       0x70f9
-#define LA_3R_STGT_W       0x70fa
-#define LA_3R_STGT_D       0x70fb
-#define LA_3R_STLE_B       0x70fc
-#define LA_3R_STLE_H       0x70fd
-#define LA_3R_STLE_W       0x70fe
-#define LA_3R_STLE_D       0x70ff
+#define LA_3R_ADD_W 0x20
+#define LA_3R_ADD_D 0x21
+#define LA_3R_SUB_W 0x22
+#define LA_3R_SUB_D 0x23
+#define LA_3R_SLT 0x24
+#define LA_3R_SLTU 0x25
+#define LA_3R_MASKEQZ 0x26
+#define LA_3R_MASKNEZ 0x27
+#define LA_3R_NOR 0x28
+#define LA_3R_AND 0x29
+#define LA_3R_OR 0x2a
+#define LA_3R_XOR 0x2b
+#define LA_3R_ORN 0x2c
+#define LA_3R_ANDN 0x2d
+#define LA_3R_SLL_W 0x2e
+#define LA_3R_SRL_W 0x2f
+#define LA_3R_SRA_W 0x30
+#define LA_3R_SLL_D 0x31
+#define LA_3R_SRL_D 0x32
+#define LA_3R_SRA_D 0x33
+#define LA_3R_ROTR_W 0x36
+#define LA_3R_ROTR_D 0x37
+#define LA_3R_MUL_W 0x38
+#define LA_3R_MULH_W 0x39
+#define LA_3R_MULH_WU 0x3a
+#define LA_3R_MUL_D 0x3b
+#define LA_3R_MULH_D 0x3c
+#define LA_3R_MULH_DU 0x3d
+#define LA_3R_MULW_D_W 0x3e
+#define LA_3R_MULW_D_WU 0x3f
+#define LA_3R_DIV_W 0x40
+#define LA_3R_MOD_W 0x41
+#define LA_3R_DIV_WU 0x42
+#define LA_3R_MOD_WU 0x43
+#define LA_3R_DIV_D 0x44
+#define LA_3R_MOD_D 0x45
+#define LA_3R_DIV_DU 0x46
+#define LA_3R_MOD_DU 0x47
+#define LA_3R_CRC_W_B_W 0x48
+#define LA_3R_CRC_W_H_W 0x49
+#define LA_3R_CRC_W_W_W 0x4a
+#define LA_3R_CRC_W_D_W 0x4b
+#define LA_3R_CRCC_W_B_W 0x4c
+#define LA_3R_CRCC_W_H_W 0x4d
+#define LA_3R_CRCC_W_W_W 0x4e
+#define LA_3R_CRCC_W_D_W 0x4f
+#define LA_3R_FADD_S 0x201
+#define LA_3R_FADD_D 0x202
+#define LA_3R_FSUB_S 0x205
+#define LA_3R_FSUB_D 0x206
+#define LA_3R_FMUL_S 0x209
+#define LA_3R_FMUL_D 0x20a
+#define LA_3R_FDIV_S 0x20d
+#define LA_3R_FDIV_D 0x20e
+#define LA_3R_FMAX_S 0x211
+#define LA_3R_FMAX_D 0x212
+#define LA_3R_FMIN_S 0x215
+#define LA_3R_FMIN_D 0x216
+#define LA_3R_FMAXA_S 0x219
+#define LA_3R_FMAXA_D 0x21a
+#define LA_3R_FMINA_S 0x21d
+#define LA_3R_FMINA_D 0x21e
+#define LA_3R_FSCALEB_S 0x221
+#define LA_3R_FSCALEB_D 0x222
+#define LA_3R_FCOPYSIGN_S 0x225
+#define LA_3R_FCOPYSIGN_D 0x226
+#define LA_3R_INVTLB 0xc91
+#define LA_3R_LDX_B 0x7000
+#define LA_3R_LDX_H 0x7008
+#define LA_3R_LDX_W 0x7010
+#define LA_3R_LDX_D 0x7018
+#define LA_3R_STX_B 0x7020
+#define LA_3R_STX_H 0x7028
+#define LA_3R_STX_W 0x7030
+#define LA_3R_STX_D 0x7038
+#define LA_3R_LDX_BU 0x7040
+#define LA_3R_LDX_HU 0x7048
+#define LA_3R_LDX_WU 0x7050
+#define LA_3R_PRELDX 0x7058
+#define LA_3R_FLDX_S 0x7060
+#define LA_3R_FLDX_D 0x7068
+#define LA_3R_FSTX_S 0x7070
+#define LA_3R_FSTX_D 0x7078
+#define LA_3R_AMSWAP_W 0x70c0
+#define LA_3R_AMSWAP_D 0x70c1
+#define LA_3R_AMADD_W 0x70c2
+#define LA_3R_AMADD_D 0x70c3
+#define LA_3R_AMAND_W 0x70c4
+#define LA_3R_AMAND_D 0x70c5
+#define LA_3R_AMOR_W 0x70c6
+#define LA_3R_AMOR_D 0x70c7
+#define LA_3R_AMXOR_W 0x70c8
+#define LA_3R_AMXOR_D 0x70c9
+#define LA_3R_AMMAX_W 0x70ca
+#define LA_3R_AMMAX_D 0x70cb
+#define LA_3R_AMMIN_W 0x70cc
+#define LA_3R_AMMIN_D 0x70cd
+#define LA_3R_AMMAX_WU 0x70ce
+#define LA_3R_AMMAX_DU 0x70cf
+#define LA_3R_AMMIN_WU 0x70d0
+#define LA_3R_AMMIN_DU 0x70d1
+#define LA_3R_AMSWAP_DB_W 0x70d2
+#define LA_3R_AMSWAP_DB_D 0x70d3
+#define LA_3R_AMADD_DB_W 0x70d4
+#define LA_3R_AMADD_DB_D 0x70d5
+#define LA_3R_AMAND_DB_W 0x70d6
+#define LA_3R_AMAND_DB_D 0x70d7
+#define LA_3R_AMOR_DB_W 0x70d8
+#define LA_3R_AMOR_DB_D 0x70d9
+#define LA_3R_AMXOR_DB_W 0x70da
+#define LA_3R_AMXOR_DB_D 0x70db
+#define LA_3R_AMMAX_DB_W 0x70dc
+#define LA_3R_AMMAX_DB_D 0x70dd
+#define LA_3R_AMMIN_DB_W 0x70de
+#define LA_3R_AMMIN_DB_D 0x70df
+#define LA_3R_AMMAX_DB_WU 0x70e0
+#define LA_3R_AMMAX_DB_DU 0x70e1
+#define LA_3R_AMMIN_DB_WU 0x70e2
+#define LA_3R_AMMIN_DB_DU 0x70e3
+#define LA_3R_FLDGT_S 0x70e8
+#define LA_3R_FLDGT_D 0x70e9
+#define LA_3R_FLDLE_S 0x70ea
+#define LA_3R_FLDLE_D 0x70eb
+#define LA_3R_FSTGT_S 0x70ec
+#define LA_3R_FSTGT_D 0x70ed
+#define LA_3R_FSTLE_S 0x70ee
+#define LA_3R_FSTLE_D 0x70ef
+#define LA_3R_LDGT_B 0x70f0
+#define LA_3R_LDGT_H 0x70f1
+#define LA_3R_LDGT_W 0x70f2
+#define LA_3R_LDGT_D 0x70f3
+#define LA_3R_LDLE_B 0x70f4
+#define LA_3R_LDLE_H 0x70f5
+#define LA_3R_LDLE_W 0x70f6
+#define LA_3R_LDLE_D 0x70f7
+#define LA_3R_STGT_B 0x70f8
+#define LA_3R_STGT_H 0x70f9
+#define LA_3R_STGT_W 0x70fa
+#define LA_3R_STGT_D 0x70fb
+#define LA_3R_STLE_B 0x70fc
+#define LA_3R_STLE_H 0x70fd
+#define LA_3R_STLE_W 0x70fe
+#define LA_3R_STLE_D 0x70ff
 
 ////LA_OP_4R opcode: bit31 ~ bit20
-#define LA_4R_FMADD_S    0x81
-#define LA_4R_FMADD_D    0x82
-#define LA_4R_FMSUB_S    0x85
-#define LA_4R_FMSUB_D    0x86
-#define LA_4R_FNMADD_S   0x89
-#define LA_4R_FNMADD_D   0x8a
-#define LA_4R_FNMSUB_S   0x8d
-#define LA_4R_FNMSUB_D   0x8e
-#define LA_4R_FSEL       0xd0
+#define LA_4R_FMADD_S 0x81
+#define LA_4R_FMADD_D 0x82
+#define LA_4R_FMSUB_S 0x85
+#define LA_4R_FMSUB_D 0x86
+#define LA_4R_FNMADD_S 0x89
+#define LA_4R_FNMADD_D 0x8a
+#define LA_4R_FNMSUB_S 0x8d
+#define LA_4R_FNMSUB_D 0x8e
+#define LA_4R_FSEL 0xd0
 
 ////LA_OP_2RI8
 
 ////LA_OP_2RI12 opcode: bit31 ~ bit22
-#define LA_2RI12_SLTI     0x8
-#define LA_2RI12_SLTUI    0x9
-#define LA_2RI12_ADDI_W   0xa
-#define LA_2RI12_ADDI_D   0xb
-#define LA_2RI12_LU52I_D  0xc
-#define LA_2RI12_ANDI     0xd
-#define LA_2RI12_ORI      0xe
-#define LA_2RI12_XORI     0xf
-#define LA_2RI12_CACHE    0x18
-#define LA_2RI12_LD_B     0xa0
-#define LA_2RI12_LD_H     0xa1
-#define LA_2RI12_LD_W     0xa2
-#define LA_2RI12_LD_D     0xa3
-#define LA_2RI12_ST_B     0xa4
-#define LA_2RI12_ST_H     0xa5
-#define LA_2RI12_ST_W     0xa6
-#define LA_2RI12_ST_D     0xa7
-#define LA_2RI12_LD_BU    0xa8
-#define LA_2RI12_LD_HU    0xa9
-#define LA_2RI12_LD_WU    0xaa
-#define LA_2RI12_PRELD    0xab
-#define LA_2RI12_FLD_S    0xac
-#define LA_2RI12_FST_S    0xad
-#define LA_2RI12_FLD_D    0xae
-#define LA_2RI12_FST_D    0xaf
+#define LA_2RI12_SLTI 0x8
+#define LA_2RI12_SLTUI 0x9
+#define LA_2RI12_ADDI_W 0xa
+#define LA_2RI12_ADDI_D 0xb
+#define LA_2RI12_LU52I_D 0xc
+#define LA_2RI12_ANDI 0xd
+#define LA_2RI12_ORI 0xe
+#define LA_2RI12_XORI 0xf
+#define LA_2RI12_CACHE 0x18
+#define LA_2RI12_LD_B 0xa0
+#define LA_2RI12_LD_H 0xa1
+#define LA_2RI12_LD_W 0xa2
+#define LA_2RI12_LD_D 0xa3
+#define LA_2RI12_ST_B 0xa4
+#define LA_2RI12_ST_H 0xa5
+#define LA_2RI12_ST_W 0xa6
+#define LA_2RI12_ST_D 0xa7
+#define LA_2RI12_LD_BU 0xa8
+#define LA_2RI12_LD_HU 0xa9
+#define LA_2RI12_LD_WU 0xaa
+#define LA_2RI12_PRELD 0xab
+#define LA_2RI12_FLD_S 0xac
+#define LA_2RI12_FST_S 0xad
+#define LA_2RI12_FLD_D 0xae
+#define LA_2RI12_FST_D 0xaf
 
 ////LA_OP_2RI14i opcode: bit31 ~ bit24
-#define LA_2RI14_LL_W      0x20
-#define LA_2RI14_SC_W      0x21
-#define LA_2RI14_LL_D      0x22
-#define LA_2RI14_SC_D      0x23
-#define LA_2RI14_LDPTR_W   0x24
-#define LA_2RI14_STPTR_W   0x25
-#define LA_2RI14_LDPTR_D   0x26
-#define LA_2RI14_STPTR_D   0x27
+#define LA_2RI14_LL_W 0x20
+#define LA_2RI14_SC_W 0x21
+#define LA_2RI14_LL_D 0x22
+#define LA_2RI14_SC_D 0x23
+#define LA_2RI14_LDPTR_W 0x24
+#define LA_2RI14_STPTR_W 0x25
+#define LA_2RI14_LDPTR_D 0x26
+#define LA_2RI14_STPTR_D 0x27
 
 ////LA_OP_2RI16 opcode: bit31 ~ bit26
-#define LA_2RI16_ADDU16I_D  0x4
-#define LA_2RI16_JIRL       0x13
-#define LA_2RI16_BEQ        0x16
-#define LA_2RI16_BNE        0x17
-#define LA_2RI16_BLT        0x18
-#define LA_2RI16_BGE        0x19
-#define LA_2RI16_BLTU       0x1a
-#define LA_2RI16_BGEU       0x1b
+#define LA_2RI16_ADDU16I_D 0x4
+#define LA_2RI16_JIRL 0x13
+#define LA_2RI16_BEQ 0x16
+#define LA_2RI16_BNE 0x17
+#define LA_2RI16_BLT 0x18
+#define LA_2RI16_BGE 0x19
+#define LA_2RI16_BLTU 0x1a
+#define LA_2RI16_BGEU 0x1b
 
 ////LA_OP_1RI20 opcode: bit31 ~ bit25
-#define LA_1RI20_LU12I_W    0xa
-#define LA_1RI20_LU32I_D    0xb
-#define LA_1RI20_PCADDI     0xc
-#define LA_1RI20_PCALAU12I  0xd
-#define LA_1RI20_PCADDU12I  0xe
-#define LA_1RI20_PCADDU18I  0xf
+#define LA_1RI20_LU12I_W 0xa
+#define LA_1RI20_LU32I_D 0xb
+#define LA_1RI20_PCADDI 0xc
+#define LA_1RI20_PCALAU12I 0xd
+#define LA_1RI20_PCADDU12I 0xe
+#define LA_1RI20_PCADDU18I 0xf
 
 ////LA_OP_I26
-#define LA_I26_B   0x14
-#define LA_I26_BL  0x15
+#define LA_I26_B 0x14
+#define LA_I26_BL 0x15
 
 ////LA_OP_1RI21
-#define LA_1RI21_BEQZ   0x10
-#define LA_1RI21_BNEZ   0x11
-#define LA_1RI21_BCEQZ  0x12
-#define LA_1RI21_BCNEZ  0x12
+#define LA_1RI21_BEQZ 0x10
+#define LA_1RI21_BNEZ 0x11
+#define LA_1RI21_BCEQZ 0x12
+#define LA_1RI21_BCNEZ 0x12
 
 ////other
-#define LA_OP_ALSL_W       0x1
-#define LA_OP_ALSL_WU      0x1
-#define LA_OP_ALSL_D       0xb
-#define LA_OP_BYTEPICK_W   0x2
-#define LA_OP_BYTEPICK_D   0x3
-#define LA_OP_BREAK        0x54
-#define LA_OP_DBGCALL      0x55
-#define LA_OP_SYSCALL      0x56
-#define LA_OP_SLLI_W       0x10
-#define LA_OP_SLLI_D       0x10
-#define LA_OP_SRLI_W       0x11
-#define LA_OP_SRLI_D       0x11
-#define LA_OP_SRAI_W       0x12
-#define LA_OP_SRAI_D       0x12
-#define LA_OP_ROTRI_W      0x13
-#define LA_OP_ROTRI_D      0x13
-#define LA_OP_FCMP_cond_S  0xc1
-#define LA_OP_FCMP_cond_D  0xc2
-#define LA_OP_BSTRINS_W    0x1
-#define LA_OP_BSTRPICK_W   0x1
-#define LA_OP_BSTRINS_D    0x2
-#define LA_OP_BSTRPICK_D   0x3
-#define LA_OP_DBAR         0x70e4
-#define LA_OP_IBAR         0x70e5
+#define LA_OP_ALSL_W 0x1
+#define LA_OP_ALSL_WU 0x1
+#define LA_OP_ALSL_D 0xb
+#define LA_OP_BYTEPICK_W 0x2
+#define LA_OP_BYTEPICK_D 0x3
+#define LA_OP_BREAK 0x54
+#define LA_OP_DBGCALL 0x55
+#define LA_OP_SYSCALL 0x56
+#define LA_OP_SLLI_W 0x10
+#define LA_OP_SLLI_D 0x10
+#define LA_OP_SRLI_W 0x11
+#define LA_OP_SRLI_D 0x11
+#define LA_OP_SRAI_W 0x12
+#define LA_OP_SRAI_D 0x12
+#define LA_OP_ROTRI_W 0x13
+#define LA_OP_ROTRI_D 0x13
+#define LA_OP_FCMP_cond_S 0xc1
+#define LA_OP_FCMP_cond_D 0xc2
+#define LA_OP_BSTRINS_W 0x1
+#define LA_OP_BSTRPICK_W 0x1
+#define LA_OP_BSTRINS_D 0x2
+#define LA_OP_BSTRPICK_D 0x3
+#define LA_OP_DBAR 0x70e4
+#define LA_OP_IBAR 0x70e5
 
 //// add other define-macro here.
 
-
 /*****************************************************************************/
 
 const instruction emitJumpKindInstructions[] = {
@@ -392,59 +391,55 @@ const emitJumpKind emitReverseJumpKinds[] = {
  * The macro define for instructions.
  */
 
-#define D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)  \
-            op0_code |= ((code_t)(op1_reg)); /* rd or fd or hint */ \
-            op0_code |= ((code_t)(op2_reg))<<5; /* rj */  \
-            op0_code |= ((op3_imm) & 0xfff)<<10
+#define D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)                                                              \
+    op0_code |= ((code_t)(op1_reg));      /* rd or fd or hint */                                                       \
+    op0_code |= ((code_t)(op2_reg)) << 5; /* rj */                                                                     \
+    op0_code |= ((op3_imm)&0xfff) << 10
 
-#define D_INST_add_d(op0_code, op1_reg, op2_reg, op3_reg)  \
-            op0_code |= ((code_t)(op1_reg));/* rd */ \
-            op0_code |= ((code_t)(op2_reg))<<5;/* rj */ \
-            op0_code |= ((code_t)(op3_reg))<<10 /* rk */
+#define D_INST_add_d(op0_code, op1_reg, op2_reg, op3_reg)                                                              \
+    op0_code |= ((code_t)(op1_reg));      /* rd */                                                                     \
+    op0_code |= ((code_t)(op2_reg)) << 5; /* rj */                                                                     \
+    op0_code |= ((code_t)(op3_reg)) << 10 /* rk */
 
-#define D_INST_3R(op0_code, op1_reg, op2_reg, op3_reg)  \
-            op0_code |= ((code_t)(op1_reg));/* rd */ \
-            op0_code |= ((code_t)(op2_reg))<<5;/* rj */ \
-            op0_code |= ((code_t)(op3_reg))<<10 /* rk */
+#define D_INST_3R(op0_code, op1_reg, op2_reg, op3_reg)                                                                 \
+    op0_code |= ((code_t)(op1_reg));      /* rd */                                                                     \
+    op0_code |= ((code_t)(op2_reg)) << 5; /* rj */                                                                     \
+    op0_code |= ((code_t)(op3_reg)) << 10 /* rk */
 
-#define D_INST_JIRL(op0_code, op1_reg, op2_reg, op3_imm)  \
-    op0_code |= ((code_t)(op1_reg)); /* rd */ \
-    op0_code |= ((code_t)(op2_reg))<<5; /* rj */ \
-    op0_code |= ((op3_imm) & 0xffff)<<10  /* offs */ \
+#define D_INST_JIRL(op0_code, op1_reg, op2_reg, op3_imm)                                                               \
+    op0_code |= ((code_t)(op1_reg));      /* rd */                                                                     \
+    op0_code |= ((code_t)(op2_reg)) << 5; /* rj */                                                                     \
+    op0_code |= ((op3_imm)&0xffff) << 10  /* offs */
 
-#define D_INST_lu12i_w(op0_code, op1_reg, op2_imm)  \
-            op0_code |= ((code_t)(op1_reg)); /* rd */ \
-            op0_code |= ((op2_imm) & 0xfffff)<<5 /* si20 */
+#define D_INST_lu12i_w(op0_code, op1_reg, op2_imm)                                                                     \
+    op0_code |= ((code_t)(op1_reg));     /* rd */                                                                      \
+    op0_code |= ((op2_imm)&0xfffff) << 5 /* si20 */
 
-#define D_INST_lu32i_d(op0_code, op1_reg, op2_imm)  \
-        D_INST_lu12i_w(op0_code, op1_reg, op2_imm)
+#define D_INST_lu32i_d(op0_code, op1_reg, op2_imm) D_INST_lu12i_w(op0_code, op1_reg, op2_imm)
 
-#define D_INST_lu52i_d(op0_code, op1_reg, op2_reg, op3_imm)  \
-        D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
+#define D_INST_lu52i_d(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
 
-#define D_INST_ori(op0_code, op1_reg, op2_reg, op3_imm)  \
-        D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
+#define D_INST_ori(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
 
-//Load or Store instructions.
-#define D_INST_LS(op0_code, op1_reg, op2_reg, op3_imm)  \
-        D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
+// Load or Store instructions.
+#define D_INST_LS(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
 
-#define D_INST_Bcond(op0_code, op1_reg, op2_reg, op3_imm)  \
-    op0_code |= ((code_t)(op1_reg) /*& 0x1f */)<<5; /* rj */ \
-    op0_code |= ((code_t)(op2_reg) /*& 0x1f */); /* rd */ \
-    assert(!((code_t)(op3_imm) & 0x3));  \
-    op0_code |= (((code_t)(op3_imm)<<8) & 0x3fffc00) /* offset */
+#define D_INST_Bcond(op0_code, op1_reg, op2_reg, op3_imm)                                                              \
+    op0_code |= ((code_t)(op1_reg) /*& 0x1f */) << 5; /* rj */                                                         \
+    op0_code |= ((code_t)(op2_reg) /*& 0x1f */);      /* rd */                                                         \
+    assert(!((code_t)(op3_imm)&0x3));                                                                                  \
+    op0_code |= (((code_t)(op3_imm) << 8) & 0x3fffc00) /* offset */
 
-#define D_INST_Bcond_Z(op0_code, op1_reg, op1_imm)  \
-    assert(!((code_t)(op1_imm) & 0x3));  \
-    op0_code |= ((code_t)(op1_reg) /*& 0x1f */)<<5; /* rj */ \
-    op0_code |= (((code_t)(op1_imm)<<8) & 0x3fffc00); \
-    op0_code |= (((code_t)(op1_imm)>>18) & 0x1f) /* offset */
+#define D_INST_Bcond_Z(op0_code, op1_reg, op1_imm)                                                                     \
+    assert(!((code_t)(op1_imm)&0x3));                                                                                  \
+    op0_code |= ((code_t)(op1_reg) /*& 0x1f */) << 5; /* rj */                                                         \
+    op0_code |= (((code_t)(op1_imm) << 8) & 0x3fffc00);                                                                \
+    op0_code |= (((code_t)(op1_imm) >> 18) & 0x1f) /* offset */
 
-#define D_INST_B(op0_code, op1_imm)  \
-    assert(!((code_t)(op1_imm) & 0x3));  \
-    op0_code |= (((code_t)(op1_imm)>>18) & 0x3ff); \
-    op0_code |= (((code_t)(op1_imm)<<8) & 0x3fffc00) /* offset */
+#define D_INST_B(op0_code, op1_imm)                                                                                    \
+    assert(!((code_t)(op1_imm)&0x3));                                                                                  \
+    op0_code |= (((code_t)(op1_imm) >> 18) & 0x3ff);                                                                   \
+    op0_code |= (((code_t)(op1_imm) << 8) & 0x3fffc00) /* offset */
 
 /*****************************************************************************
  * Look up the instruction for a jump kind
@@ -463,7 +458,7 @@ const emitJumpKind emitReverseJumpKinds[] = {
 
 /*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
     return EJ_NONE;
 #if 0
     for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++)
@@ -520,7 +515,7 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id)
                 assert(!id->idIsLargeCns());
                 return sizeof(instrDesc);
             }
-            //break;
+        // break;
 
         case INS_OPTS_I:
         case INS_OPTS_RC:
@@ -567,7 +562,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
         case IF_FUNCS_6C:
         case IF_FUNCS_6D:
         case IF_FUNCS_11:
-        //case IF_LA:
+            // case IF_LA:
             break;
 
         default:
@@ -582,7 +577,7 @@ inline bool emitter::emitInsMayWriteToGCReg(instruction ins)
 {
     assert(ins != INS_invalid);
     ////NOTE: please reference the file "instrsloongarch64.h" for details !!!
-    return  (INS_mov <= ins) && (ins <= INS_jirl) ? true : false;
+    return (INS_mov <= ins) && (ins <= INS_jirl) ? true : false;
 }
 
 bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
@@ -598,27 +593,27 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
     switch (ins)
     {
         case INS_st_d:
+        case INS_st_w:
+        case INS_st_b:
+        case INS_st_h:
         case INS_stptr_d:
-/////// not used these instrs right now !!!
-        //case INS_sc_d:
-        //case INS_stx_d:
-//#ifdef DEBUG
-//        case INS_st_b:
-//        case INS_st_h:
-//        case INS_st_w:
-//        case INS_stx_b:
-//        case INS_stx_h:
-//        case INS_stx_w:
-//        //case INS_sc_w:
-//        //case INS_stgt_b:
-//        //case INS_stgt_h:
-//        //case INS_stgt_w:
-//        //case INS_stgt_d:
-//        //case INS_stle_b:
-//        //case INS_stle_h:
-//        //case INS_stle_w:
-//        //case INS_stle_d:
-//#endif
+        case INS_stx_d:
+        case INS_stx_w:
+        case INS_stx_b:
+        case INS_stx_h:
+            // case INS_sc_d:
+            // case INS_sc_w:
+            //// not used these instrs right now !!!
+            //#ifdef DEBUG
+            // case INS_stgt_b:
+            // case INS_stgt_h:
+            // case INS_stgt_w:
+            // case INS_stgt_d:
+            // case INS_stle_b:
+            // case INS_stle_h:
+            // case INS_stle_w:
+            // case INS_stle_d:
+            //#endif
             return true;
         default:
             return false;
@@ -662,7 +657,7 @@ bool emitter::emitInsIsLoad(instruction ins)
 }
 
 //------------------------------------------------------------------------
-//emitInsIsStore: Returns true if the instruction is some kind of store instruction.
+// emitInsIsStore: Returns true if the instruction is some kind of store instruction.
 //
 bool emitter::emitInsIsStore(instruction ins)
 {
@@ -674,7 +669,7 @@ bool emitter::emitInsIsStore(instruction ins)
 }
 
 //-------------------------------------------------------------------------
-//emitInsIsLoadOrStore: Returns true if the instruction is some kind of load/store instruction.
+// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load/store instruction.
 //
 bool emitter::emitInsIsLoadOrStore(instruction ins)
 {
@@ -695,7 +690,7 @@ bool emitter::emitInsIsLoadOrStore(instruction ins)
 
 inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/)
 {
-    code_t    code           = BAD_CODE;
+    code_t code = BAD_CODE;
 
     // clang-format off
     const static code_t insCode[] =
@@ -719,14 +714,14 @@ inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/)
 
 void emitter::emitIns(instruction ins)
 {
-    //instrDesc* id  = emitNewInstrSmall(EA_8BYTE);
+    // instrDesc* id  = emitNewInstrSmall(EA_8BYTE);
     instrDesc* id = emitNewInstr(EA_8BYTE);
 
     id->idIns(ins);
     id->idAddr()->iiaSetInstrEncode(emitInsCode(ins));
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -737,10 +732,10 @@ void emitter::emitIns(instruction ins)
  */
 void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
 {
-    //assert(offs >= 0);
+    // assert(offs >= 0);
     ssize_t imm;
 
-    emitAttr  size  = EA_SIZE(attr);//it's better confirm attr with ins.
+    emitAttr size = EA_SIZE(attr); // it's better confirm attr with ins.
 
 #ifdef DEBUG
     switch (ins)
@@ -749,10 +744,10 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
         case INS_st_h:
         case INS_st_w:
         case INS_fst_s:
-        //case INS_swl:
-        //case INS_swr:
-        //case INS_sdl:
-        //case INS_sdr:
+        // case INS_swl:
+        // case INS_swr:
+        // case INS_sdl:
+        // case INS_sdr:
         case INS_st_d:
         case INS_fst_d:
             break;
@@ -769,15 +764,15 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
     bool FPbased;
 
     base = emitComp->lvaFrameAddress(varx, &FPbased);
-    imm = offs < 0 ? -offs -8: base + offs;
+    imm  = offs < 0 ? -offs - 8 : base + offs;
 
     regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-    reg2 = offs < 0 ? REG_R21 : reg2;
-    offs = offs < 0 ? -offs -8: offs;
+    reg2           = offs < 0 ? REG_R21 : reg2;
+    offs           = offs < 0 ? -offs - 8 : offs;
 
     if ((-2048 <= imm) && (imm < 2048))
     {
-        //regs[1] = reg2;
+        // regs[1] = reg2;
     }
     else
     {
@@ -789,7 +784,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
         emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2);
 
         imm2 = imm2 & 0x7ff;
-        imm = imm3 ? imm2 - imm3 : imm2;
+        imm  = imm3 ? imm2 - imm3 : imm2;
 
         reg2 = REG_RA;
     }
@@ -810,16 +805,16 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
     id->idSetIsLclVar();
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
 void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
 {
-    //assert(offs >= 0);
+    // assert(offs >= 0);
     ssize_t imm;
 
-    emitAttr  size  = EA_SIZE(attr);//it's better confirm attr with ins.
+    emitAttr size = EA_SIZE(attr); // it's better confirm attr with ins.
 
 #ifdef DEBUG
     switch (ins)
@@ -837,12 +832,12 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
         case INS_ld_d:
         case INS_fld_d:
 
-        //case INS_lwl:
-        //case INS_lwr:
+            // case INS_lwl:
+            // case INS_lwr:
 
-        //case INS_ldl:
-        //case INS_ldr:
-            //assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size));
+            // case INS_ldl:
+            // case INS_ldr:
+            // assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size));
             break;
 
         case INS_lea:
@@ -861,11 +856,11 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
     bool FPbased;
 
     base = emitComp->lvaFrameAddress(varx, &FPbased);
-    imm = offs < 0 ? -offs -8: base + offs;
+    imm  = offs < 0 ? -offs - 8 : base + offs;
 
     regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-    reg2 = offs < 0 ? REG_R21 : reg2;
-    offs = offs < 0 ? -offs -8: offs;
+    reg2           = offs < 0 ? REG_R21 : reg2;
+    offs           = offs < 0 ? -offs - 8 : offs;
 
     reg1 = (regNumber)((char)reg1 & 0x1f);
     code_t code;
@@ -887,7 +882,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
             ssize_t imm2 = imm & 0xfff;
             emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_RA, REG_RA, imm2);
 
-            ins = INS_add_d;
+            ins  = INS_add_d;
             code = emitInsCode(ins);
             D_INST_add_d(code, reg1, reg2, REG_RA);
         }
@@ -902,15 +897,15 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
 
             imm2 = imm2 & 0x7ff;
             code = emitInsCode(ins);
-            D_INST_2RI12(code, reg1/* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2);
+            D_INST_2RI12(code, reg1 /* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2);
         }
-        //reg2 = REG_RA;
+        // reg2 = REG_RA;
     }
 
     instrDesc* id = emitNewInstr(attr);
 
     id->idReg1(reg1);
-    //id->idReg2(reg2);//not used.
+    // id->idReg2(reg2);//not used.
 
     id->idIns(ins);
 
@@ -919,7 +914,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
     id->idSetIsLclVar();
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -937,13 +932,13 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
         case INS_b:
         case INS_bl:
             assert(!(imm & 0x3));
-            code |= ((imm>>18) & 0x3ff);    //offs[25:16]
-            code |= ((imm>>2) & 0xffff)<<10;//offs[15:0]
+            code |= ((imm >> 18) & 0x3ff);       // offs[25:16]
+            code |= ((imm >> 2) & 0xffff) << 10; // offs[15:0]
             break;
         case INS_dbar:
         case INS_ibar:
             assert((0 <= imm) && (imm <= 0x7fff));
-            code |= (imm & 0x7fff); //hint
+            code |= (imm & 0x7fff); // hint
             break;
         default:
             unreached();
@@ -955,7 +950,7 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
     id->idAddr()->iiaSetInstrEncode(code);
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -967,8 +962,8 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of
         case INS_bceqz:
         case INS_bcnez:
             break;
-        //case INS_:
-        //case INS_:
+        // case INS_:
+        // case INS_:
         //    break;
 
         default:
@@ -980,9 +975,9 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of
 
     assert(!(offs & 0x3));
     assert(!(cc >> 3));
-    code |= ((cc & 0x7) << 5);       //cj
-    code |= ((offs >> 18) & 0x1f);     //offs[20:16]
-    code |= ((offs >> 2) & 0xffff)<<10;//offs[15:0]
+    code |= ((cc & 0x7) << 5);            // cj
+    code |= ((offs >> 18) & 0x1f);        // offs[20:16]
+    code |= ((offs >> 2) & 0xffff) << 10; // offs[15:0]
 
     instrDesc* id = emitNewInstr(attr);
 
@@ -990,7 +985,7 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of
     id->idAddr()->iiaSetInstrEncode(code);
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -1001,7 +996,7 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of
 
 void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     code_t code = emitInsCode(ins);
 
@@ -1046,7 +1041,7 @@ assert(!"unimplemented on LOONGARCH yet");
 void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
 {
     code_t code = emitInsCode(ins);
-//#ifdef DEBUG
+    //#ifdef DEBUG
     switch (ins)
     {
         case INS_lu12i_w:
@@ -1058,52 +1053,52 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
             assert(isGeneralRegister(reg));
             assert((-524288 <= imm) && (imm < 524288));
 
-            code |= reg; //rd
-            code |= (imm & 0xfffff)<<5;//si20
+            code |= reg;                  // rd
+            code |= (imm & 0xfffff) << 5; // si20
             break;
         case INS_beqz:
         case INS_bnez:
             assert(isGeneralRegisterOrR0(reg));
             assert(!(imm & 0x3));
-            assert((-1048576 <= (imm>>2)) && ((imm>>2) <= 1048575));
+            assert((-1048576 <= (imm >> 2)) && ((imm >> 2) <= 1048575));
 
-            code |= ((imm>>18) & 0x1f);     //offs[20:16]
-            code |= reg << 5;        //rj
-            code |= ((imm>>2) & 0xffff)<<10;//offs[15:0]
+            code |= ((imm >> 18) & 0x1f);        // offs[20:16]
+            code |= reg << 5;                    // rj
+            code |= ((imm >> 2) & 0xffff) << 10; // offs[15:0]
             break;
         case INS_movfr2cf:
             assert(isFloatReg(reg));
             assert((0 <= imm) && (imm <= 7));
 
-            code |= (reg & 0x1f)<<5;//fj
-            code |= imm /*& 0x7*/;  //cc
+            code |= (reg & 0x1f) << 5; // fj
+            code |= imm /*& 0x7*/;     // cc
             break;
         case INS_movcf2fr:
             assert(isFloatReg(reg));
             assert((0 <= imm) && (imm <= 7));
 
-            code |= (reg & 0x1f);//fd
-            code |= (imm /*& 0x7*/)<<5;  //cc
+            code |= (reg & 0x1f);         // fd
+            code |= (imm /*& 0x7*/) << 5; // cc
             break;
         case INS_movgr2cf:
             assert(isGeneralRegister(reg));
             assert((0 <= imm) && (imm <= 7));
 
-            code |= reg<<5;//rj
-            code |= imm /*& 0x7*/;  //cc
+            code |= reg << 5;      // rj
+            code |= imm /*& 0x7*/; // cc
             break;
         case INS_movcf2gr:
             assert(isGeneralRegister(reg));
             assert((0 <= imm) && (imm <= 7));
 
-            code |= reg;//rd
-            code |= (imm /*& 0x7*/)<<5;  //cc
+            code |= reg;                  // rd
+            code |= (imm /*& 0x7*/) << 5; // cc
             break;
         default:
             unreached();
             break;
     } // end switch (ins)
-//#endif
+      //#endif
 
     instrDesc* id = emitNewInstr(attr);
 
@@ -1112,11 +1107,11 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
     id->idAddr()->iiaSetInstrEncode(code);
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
-//NOTEADD:This function is new in emitarm64.cpp,so it be added to emitloongarch.cpp.
+// NOTEADD:This function is new in emitarm64.cpp,so it be added to emitloongarch.cpp.
 //        But I don't konw how to change it so that it can be used on LA.
 //        I just add a statement "assert(!"unimplemented on LOONGARCH yet");".
 //------------------------------------------------------------------------
@@ -1132,7 +1127,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
 //
 void emitter::emitIns_Mov(
     instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */)
-{//TODO: should amend for LoongArch64/LOONGARCH64.
+{ // TODO: should amend for LoongArch64/LOONGARCH64.
     assert(IsMovInstruction(ins));
 
     if (!canSkip || (dstReg != srcReg))
@@ -1149,134 +1144,140 @@ void emitter::emitIns_R_R(
 {
     code_t code = emitInsCode(ins);
 
-    if (INS_mov == ins) {
+    if (INS_mov == ins)
+    {
         assert(isGeneralRegisterOrR0(reg1));
         assert(isGeneralRegisterOrR0(reg2));
-        code |= reg1;    //rd
-        code |= reg2<<5; //rj
+        code |= reg1;      // rd
+        code |= reg2 << 5; // rj
     }
-    else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg)) {
-        //case INS_ext_w_b:
-        //case INS_ext_w_h:
-        //case INS_clo_w:
-        //case INS_clz_w:
-        //case INS_cto_w:
-        //case INS_ctz_w:
-        //case INS_clo_d:
-        //case INS_clz_d:
-        //case INS_cto_d:
-        //case INS_ctz_d:
-        //case INS_revb_2h:
-        //case INS_revb_4h:
-        //case INS_revb_2w:
-        //case INS_revb_d:
-        //case INS_revh_2w:
-        //case INS_revh_d:
-        //case INS_bitrev_4b:
-        //case INS_bitrev_8b:
-        //case INS_bitrev_w:
-        //case INS_bitrev_d:
-        //case INS_rdtimel_w:
-        //case INS_rdtimeh_w:
-        //case INS_rdtime_d:
-        //case INS_cpucfg:
+    else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg))
+    {
+        // case INS_ext_w_b:
+        // case INS_ext_w_h:
+        // case INS_clo_w:
+        // case INS_clz_w:
+        // case INS_cto_w:
+        // case INS_ctz_w:
+        // case INS_clo_d:
+        // case INS_clz_d:
+        // case INS_cto_d:
+        // case INS_ctz_d:
+        // case INS_revb_2h:
+        // case INS_revb_4h:
+        // case INS_revb_2w:
+        // case INS_revb_d:
+        // case INS_revh_2w:
+        // case INS_revh_d:
+        // case INS_bitrev_4b:
+        // case INS_bitrev_8b:
+        // case INS_bitrev_w:
+        // case INS_bitrev_d:
+        // case INS_rdtimel_w:
+        // case INS_rdtimeh_w:
+        // case INS_rdtime_d:
+        // case INS_cpucfg:
         assert(isGeneralRegisterOrR0(reg1));
         assert(isGeneralRegisterOrR0(reg2));
-        code |= reg1;   //rd
-        code |= reg2 << 5;//rj
+        code |= reg1;      // rd
+        code |= reg2 << 5; // rj
     }
-    else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins)) {
-        //case INS_asrtle_d:
-        //case INS_asrtgt_d:
+    else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins))
+    {
+        // case INS_asrtle_d:
+        // case INS_asrtgt_d:
         assert(isGeneralRegisterOrR0(reg1));
         assert(isGeneralRegisterOrR0(reg2));
-        code |= reg1 << 5;  //rj
-        code |= reg2 << 10; //rk
+        code |= reg1 << 5;  // rj
+        code |= reg2 << 10; // rk
     }
-    else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d)) {
-        //case INS_fabs_s:
-        //case INS_fabs_d:
-        //case INS_fneg_s:
-        //case INS_fneg_d:
-        //case INS_fsqrt_s:
-        //case INS_fsqrt_d:
-        //case INS_frsqrt_s:
-        //case INS_frsqrt_d:
-        //case INS_frecip_s:
-        //case INS_frecip_d:
-        //case INS_flogb_s:
-        //case INS_flogb_d:
-        //case INS_fclass_s:
-        //case INS_fclass_d:
-        //case INS_fcvt_s_d:
-        //case INS_fcvt_d_s:
-        //case INS_ffint_s_w:
-        //case INS_ffint_s_l:
-        //case INS_ffint_d_w:
-        //case INS_ffint_d_l:
-        //case INS_ftint_w_s:
-        //case INS_ftint_w_d:
-        //case INS_ftint_l_s:
-        //case INS_ftint_l_d:
-        //case INS_ftintrm_w_s:
-        //case INS_ftintrm_w_d:
-        //case INS_ftintrm_l_s:
-        //case INS_ftintrm_l_d:
-        //case INS_ftintrp_w_s:
-        //case INS_ftintrp_w_d:
-        //case INS_ftintrp_l_s:
-        //case INS_ftintrp_l_d:
-        //case INS_ftintrz_w_s:
-        //case INS_ftintrz_w_d:
-        //case INS_ftintrz_l_s:
-        //case INS_ftintrz_l_d:
-        //case INS_ftintrne_w_s:
-        //case INS_ftintrne_w_d:
-        //case INS_ftintrne_l_s:
-        //case INS_ftintrne_l_d:
-        //case INS_frint_s:
-        //case INS_frint_d:
-        //case INS_fmov_s:
-        //case INS_fmov_d:
+    else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d))
+    {
+        // case INS_fabs_s:
+        // case INS_fabs_d:
+        // case INS_fneg_s:
+        // case INS_fneg_d:
+        // case INS_fsqrt_s:
+        // case INS_fsqrt_d:
+        // case INS_frsqrt_s:
+        // case INS_frsqrt_d:
+        // case INS_frecip_s:
+        // case INS_frecip_d:
+        // case INS_flogb_s:
+        // case INS_flogb_d:
+        // case INS_fclass_s:
+        // case INS_fclass_d:
+        // case INS_fcvt_s_d:
+        // case INS_fcvt_d_s:
+        // case INS_ffint_s_w:
+        // case INS_ffint_s_l:
+        // case INS_ffint_d_w:
+        // case INS_ffint_d_l:
+        // case INS_ftint_w_s:
+        // case INS_ftint_w_d:
+        // case INS_ftint_l_s:
+        // case INS_ftint_l_d:
+        // case INS_ftintrm_w_s:
+        // case INS_ftintrm_w_d:
+        // case INS_ftintrm_l_s:
+        // case INS_ftintrm_l_d:
+        // case INS_ftintrp_w_s:
+        // case INS_ftintrp_w_d:
+        // case INS_ftintrp_l_s:
+        // case INS_ftintrp_l_d:
+        // case INS_ftintrz_w_s:
+        // case INS_ftintrz_w_d:
+        // case INS_ftintrz_l_s:
+        // case INS_ftintrz_l_d:
+        // case INS_ftintrne_w_s:
+        // case INS_ftintrne_w_d:
+        // case INS_ftintrne_l_s:
+        // case INS_ftintrne_l_d:
+        // case INS_frint_s:
+        // case INS_frint_d:
+        // case INS_fmov_s:
+        // case INS_fmov_d:
         assert(isFloatReg(reg1));
         assert(isFloatReg(reg2));
-        code |= (reg1 & 0x1f);    //fd
-        code |= (reg2 & 0x1f)<<5; //fj
+        code |= (reg1 & 0x1f);      // fd
+        code |= (reg2 & 0x1f) << 5; // fj
     }
-    else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w)) {
-        //case INS_movgr2fr_w:
-        //case INS_movgr2fr_d:
-        //case INS_movgr2frh_w:
+    else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w))
+    {
+        // case INS_movgr2fr_w:
+        // case INS_movgr2fr_d:
+        // case INS_movgr2frh_w:
         assert(isFloatReg(reg1));
         assert(isGeneralRegisterOrR0(reg2));
-        code |= (reg1 & 0x1f);    //fd
-        code |= reg2 << 5; //rj
+        code |= (reg1 & 0x1f); // fd
+        code |= reg2 << 5;     // rj
     }
-    else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s)) {
-        //case INS_movfr2gr_s:
-        //case INS_movfr2gr_d:
-        //case INS_movfrh2gr_s:
+    else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s))
+    {
+        // case INS_movfr2gr_s:
+        // case INS_movfr2gr_d:
+        // case INS_movfrh2gr_s:
         assert(isGeneralRegisterOrR0(reg1));
         assert(isFloatReg(reg2));
-        code |= reg1;    //rd
-        code |= (reg2 & 0x1f)<<5; //fj
+        code |= reg1;               // rd
+        code |= (reg2 & 0x1f) << 5; // fj
     }
     else if ((INS_dneg == ins) || (INS_neg == ins))
     {
         assert(isGeneralRegisterOrR0(reg1));
         assert(isGeneralRegisterOrR0(reg2));
-        //sub_d rd, zero, rk
-        //sub_w rd, zero, rk
-        code |= reg1;       //rd
-        code |= reg2 << 10; //rk
+        // sub_d rd, zero, rk
+        // sub_w rd, zero, rk
+        code |= reg1;       // rd
+        code |= reg2 << 10; // rk
     }
     else if (INS_not == ins)
     {
         assert(isGeneralRegisterOrR0(reg1));
         assert(isGeneralRegisterOrR0(reg2));
-        //nor rd, rj, zero
-        code |= reg1;      //rd
-        code |= reg2 << 5; //rj
+        // nor rd, rj, zero
+        code |= reg1;      // rd
+        code |= reg2 << 5; // rj
     }
     else
     {
@@ -1291,14 +1292,14 @@ void emitter::emitIns_R_R(
     id->idAddr()->iiaSetInstrEncode(code);
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
 void emitter::emitIns_R_I_I(
     instruction ins, emitAttr attr, regNumber reg, ssize_t hint, ssize_t off, insOpts opt /* = INS_OPTS_NONE */)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
 #ifdef DEBUG
     switch (ins)
@@ -1341,182 +1342,191 @@ void emitter::emitIns_R_R_I(
 {
     code_t code = emitInsCode(ins);
 
-    if ((INS_slli_w <= ins) && (ins <= INS_rotri_w)) {
-        //INS_slli_w
-        //INS_srli_w
-        //INS_srai_w
-        //INS_rotri_w
+    if ((INS_slli_w <= ins) && (ins <= INS_rotri_w))
+    {
+        // INS_slli_w
+        // INS_srli_w
+        // INS_srai_w
+        // INS_rotri_w
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert((0 <= imm) && (imm <= 0x1f));
 
-        code |= reg1;    //rd
-        code |= reg2<<5; //rj
-        code |= (imm & 0x1f)<<10;//ui5
+        code |= reg1;               // rd
+        code |= reg2 << 5;          // rj
+        code |= (imm & 0x1f) << 10; // ui5
     }
-    else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d)) {
-        //INS_slli_d
-        //INS_srli_d
-        //INS_srai_d
-        //INS_rotri_d
+    else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d))
+    {
+        // INS_slli_d
+        // INS_srli_d
+        // INS_srai_d
+        // INS_rotri_d
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert((0 <= imm) && (imm <= 0x3f));
 
-        code |= reg1;    //rd
-        code |= reg2<<5; //rj
-        code |= (imm & 0x3f)<<10;//ui6
+        code |= reg1;               // rd
+        code |= reg2 << 5;          // rj
+        code |= (imm & 0x3f) << 10; // ui6
     }
-    else if (((INS_addi_w <= ins) && (ins <= INS_xori)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || ((INS_st_b <= ins) && (ins <= INS_st_d))) {
+    else if (((INS_addi_w <= ins) && (ins <= INS_xori)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) ||
+             ((INS_st_b <= ins) && (ins <= INS_st_d)))
+    {
 #ifdef DEBUG
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
-        if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) || ((INS_st_b <= ins) && (ins <= INS_st_d))) {
-            //case INS_addi_w:
-            //case INS_addi_d:
-            //case INS_lu52i_d:
-            //case INS_slti:
-            //case INS_ld_b:
-            //case INS_ld_h:
-            //case INS_ld_w:
-            //case INS_ld_d:
-            //case INS_ld_bu:
-            //case INS_ld_hu:
-            //case INS_ld_wu:
-            //case INS_st_b:
-            //case INS_st_h:
-            //case INS_st_w:
-            //case INS_st_d:
+        if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) ||
+            ((INS_st_b <= ins) && (ins <= INS_st_d)))
+        {
+            // case INS_addi_w:
+            // case INS_addi_d:
+            // case INS_lu52i_d:
+            // case INS_slti:
+            // case INS_ld_b:
+            // case INS_ld_h:
+            // case INS_ld_w:
+            // case INS_ld_d:
+            // case INS_ld_bu:
+            // case INS_ld_hu:
+            // case INS_ld_wu:
+            // case INS_st_b:
+            // case INS_st_h:
+            // case INS_st_w:
+            // case INS_st_d:
 
             assert((-2048 <= imm) && (imm <= 2047));
         }
         else if (ins == INS_sltui)
         {
-            //case INS_sltui:
+            // case INS_sltui:
             assert((0 <= imm) && (imm <= 0x7ff));
         }
         else
         {
-            //case INS_andi:
-            //case INS_ori:
-            //case INS_xori:
+            // case INS_andi:
+            // case INS_ori:
+            // case INS_xori:
             assert((0 <= imm) && (imm <= 0xfff));
         }
 #endif
-        code |= reg1;    //rd
-        code |= reg2<<5; //rj
-        code |= (imm & 0xfff)<<10;//si12 or ui12
+        code |= reg1;                // rd
+        code |= reg2 << 5;           // rj
+        code |= (imm & 0xfff) << 10; // si12 or ui12
     }
-    else if ((INS_fld_s <= ins) && (ins <= INS_fst_d)) {
-        //INS_fld_s
-        //INS_fld_d
-        //INS_fst_s
-        //INS_fst_d
+    else if ((INS_fld_s <= ins) && (ins <= INS_fst_d))
+    {
+        // INS_fld_s
+        // INS_fld_d
+        // INS_fst_s
+        // INS_fst_d
         assert(isFloatReg(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert((-2048 <= imm) && (imm <= 2047));
 
-        code |= reg1 & 0x1f;    //fd
-        code |= reg2 << 5; //rj
-        code |= (imm & 0xfff)<<10;//si12
+        code |= reg1 & 0x1f;         // fd
+        code |= reg2 << 5;           // rj
+        code |= (imm & 0xfff) << 10; // si12
     }
-    else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w))) {
-        //INS_ldptr_w
-        //INS_ldptr_d
-        //INS_ll_w
-        //INS_ll_d
-
-        //INS_stptr_w
-        //INS_stptr_d
-        //INS_sc_w
-        //INS_sc_d
+    else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w)))
+    {
+        // INS_ldptr_w
+        // INS_ldptr_d
+        // INS_ll_w
+        // INS_ll_d
+
+        // INS_stptr_w
+        // INS_stptr_d
+        // INS_sc_w
+        // INS_sc_d
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert((-8192 <= imm) && (imm <= 8191));
 
-        code |= reg1;    //rd
-        code |= reg2 << 5; //rj
-        code |= (imm & 0x3fff)<<10;//si14
+        code |= reg1;                 // rd
+        code |= reg2 << 5;            // rj
+        code |= (imm & 0x3fff) << 10; // si14
     }
     else if ((INS_beq <= ins) && (ins <= INS_bgeu))
     {
-        //INS_beq
-        //INS_bne
-        //INS_blt
-        //INS_bltu
-        //INS_bge
-        //INS_bgeu
+        // INS_beq
+        // INS_bne
+        // INS_blt
+        // INS_bltu
+        // INS_bge
+        // INS_bgeu
         assert(isGeneralRegisterOrR0(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert(!(imm & 0x3));
-        assert((-32768 <= (imm>>2)) && ((imm>>2) <= 32767));
+        assert((-32768 <= (imm >> 2)) && ((imm >> 2) <= 32767));
 
-        code |= reg1 << 5;  //rj
-        code |= reg2;       //rd
-        code |= ((imm>>2) & 0xffff)<<10;//offs16
+        code |= reg1 << 5;                   // rj
+        code |= reg2;                        // rd
+        code |= ((imm >> 2) & 0xffff) << 10; // offs16
     }
     else if ((INS_fcmp_caf_s <= ins) && (ins <= INS_fcmp_sune_s))
     {
-        //INS_fcmp_caf_s
-        //INS_fcmp_cun_s
-        //INS_fcmp_ceq_s
-        //INS_fcmp_cueq_s
-        //INS_fcmp_clt_s
-        //INS_fcmp_cult_s
-        //INS_fcmp_cle_s
-        //INS_fcmp_cule_s
-        //INS_fcmp_cne_s
-        //INS_fcmp_cor_s
-        //INS_fcmp_cune_s
-        //INS_fcmp_saf_d
-        //INS_fcmp_sun_d
-        //INS_fcmp_seq_d
-        //INS_fcmp_sueq_d
-        //INS_fcmp_slt_d
-        //INS_fcmp_sult_d
-        //INS_fcmp_sle_d
-        //INS_fcmp_sule_d
-        //INS_fcmp_sne_d
-        //INS_fcmp_sor_d
-        //INS_fcmp_sune_d
-        //INS_fcmp_caf_d
-        //INS_fcmp_cun_d
-        //INS_fcmp_ceq_d
-        //INS_fcmp_cueq_d
-        //INS_fcmp_clt_d
-        //INS_fcmp_cult_d
-        //INS_fcmp_cle_d
-        //INS_fcmp_cule_d
-        //INS_fcmp_cne_d
-        //INS_fcmp_cor_d
-        //INS_fcmp_cune_d
-        //INS_fcmp_saf_s
-        //INS_fcmp_sun_s
-        //INS_fcmp_seq_s
-        //INS_fcmp_sueq_s
-        //INS_fcmp_slt_s
-        //INS_fcmp_sult_s
-        //INS_fcmp_sle_s
-        //INS_fcmp_sule_s
-        //INS_fcmp_sne_s
-        //INS_fcmp_sor_s
-        //INS_fcmp_sune_s
+        // INS_fcmp_caf_s
+        // INS_fcmp_cun_s
+        // INS_fcmp_ceq_s
+        // INS_fcmp_cueq_s
+        // INS_fcmp_clt_s
+        // INS_fcmp_cult_s
+        // INS_fcmp_cle_s
+        // INS_fcmp_cule_s
+        // INS_fcmp_cne_s
+        // INS_fcmp_cor_s
+        // INS_fcmp_cune_s
+        // INS_fcmp_saf_d
+        // INS_fcmp_sun_d
+        // INS_fcmp_seq_d
+        // INS_fcmp_sueq_d
+        // INS_fcmp_slt_d
+        // INS_fcmp_sult_d
+        // INS_fcmp_sle_d
+        // INS_fcmp_sule_d
+        // INS_fcmp_sne_d
+        // INS_fcmp_sor_d
+        // INS_fcmp_sune_d
+        // INS_fcmp_caf_d
+        // INS_fcmp_cun_d
+        // INS_fcmp_ceq_d
+        // INS_fcmp_cueq_d
+        // INS_fcmp_clt_d
+        // INS_fcmp_cult_d
+        // INS_fcmp_cle_d
+        // INS_fcmp_cule_d
+        // INS_fcmp_cne_d
+        // INS_fcmp_cor_d
+        // INS_fcmp_cune_d
+        // INS_fcmp_saf_s
+        // INS_fcmp_sun_s
+        // INS_fcmp_seq_s
+        // INS_fcmp_sueq_s
+        // INS_fcmp_slt_s
+        // INS_fcmp_sult_s
+        // INS_fcmp_sle_s
+        // INS_fcmp_sule_s
+        // INS_fcmp_sne_s
+        // INS_fcmp_sor_s
+        // INS_fcmp_sune_s
         assert(isFloatReg(reg1));
         assert(isFloatReg(reg2));
         assert((0 <= imm) && (imm <= 7));
 
-        code |= (reg1 & 0x1f)<<5;   //fj
-        code |= (reg2 & 0x1f)<<10;  //fk
-        code |= imm & 0x7; //cc
+        code |= (reg1 & 0x1f) << 5;  // fj
+        code |= (reg2 & 0x1f) << 10; // fk
+        code |= imm & 0x7;           // cc
     }
-    else if (INS_addu16i_d == ins) {
+    else if (INS_addu16i_d == ins)
+    {
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert((-32768 <= imm) && (imm < 32768));
 
-        code |= reg1;    //rd
-        code |= reg2<<5; //rj
-        code |= (imm & 0xffff)<<10;//si16
+        code |= reg1;                 // rd
+        code |= reg2 << 5;            // rj
+        code |= (imm & 0xffff) << 10; // si16
     }
     else if (INS_jirl == ins)
     {
@@ -1524,9 +1534,9 @@ void emitter::emitIns_R_R_I(
         assert(isGeneralRegisterOrR0(reg2));
         assert((-32768 <= imm) && (imm < 32768));
 
-        code |= reg1;    //rd
-        code |= reg2<<5; //rj
-        code |= (imm & 0xffff)<<10;//offs16
+        code |= reg1;                 // rd
+        code |= reg2 << 5;            // rj
+        code |= (imm & 0xffff) << 10; // offs16
     }
     else
     {
@@ -1541,7 +1551,7 @@ void emitter::emitIns_R_R_I(
     id->idAddr()->iiaSetInstrEncode(code);
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -1556,7 +1566,7 @@ void emitter::emitIns_R_R_I(
 *  - Requires that reg1 != reg2
 */
 void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm)
-{//maybe optimize.
+{ // maybe optimize.
     assert(isGeneralRegister(reg1));
     assert(reg1 != reg2);
 
@@ -1567,20 +1577,20 @@ void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, re
     {
         case INS_addi_w:
         case INS_addi_d:
-        //case INS_lui:
-        //case INS_lbu:
-        //case INS_lhu:
-        //case INS_lwu:
-        //case INS_lb:
-        //case INS_lh:
-        //case INS_lw:
+        // case INS_lui:
+        // case INS_lbu:
+        // case INS_lhu:
+        // case INS_lwu:
+        // case INS_lb:
+        // case INS_lh:
+        // case INS_lw:
         case INS_ld_d:
-        //case INS_sb:
-        //case INS_sh:
-        //case INS_sw:
-        //case INS_sd:
-        ////case INS_lwc1:
-        ////case INS_ldc1:
+            // case INS_sb:
+            // case INS_sh:
+            // case INS_sw:
+            // case INS_sd:
+            ////case INS_lwc1:
+            ////case INS_ldc1:
             immFits = isValidSimm12(imm);
             break;
 
@@ -1621,182 +1631,186 @@ void emitter::emitIns_R_R_R(
 {
     code_t code = emitInsCode(ins);
 
-    if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) || ((INS_stx_b <= ins) && (ins <= INS_stle_d))) {
-        //case INS_add_w:
-        //case INS_add_d:
-        //case INS_sub_w:
-        //case INS_sub_d:
-        //case INS_and:
-        //case INS_or:
-        //case INS_nor:
-        //case INS_xor:
-        //case INS_andn:
-        //case INS_orn:
-
-        //case INS_mul_w:
-        //case INS_mul_d:
-        //case INS_mulh_w:
-        //case INS_mulh_wu:
-        //case INS_mulh_d:
-        //case INS_mulh_du:
-        //case INS_mulw_d_w:
-        //case INS_mulw_d_wu:
-        //case INS_div_w:
-        //case INS_div_wu:
-        //case INS_div_d:
-        //case INS_div_du:
-        //case INS_mod_w:
-        //case INS_mod_wu:
-        //case INS_mod_d:
-        //case INS_mod_du:
-
-        //case INS_sll_w:
-        //case INS_srl_w:
-        //case INS_sra_w:
-        //case INS_rotr_w:
-        //case INS_sll_d:
-        //case INS_srl_d:
-        //case INS_sra_d:
-        //case INS_rotr_d:
-
-        //case INS_maskeqz:
-        //case INS_masknez:
-
-        //case INS_slt:
-        //case INS_sltu:
-
-        //case INS_ldx_b:
-        //case INS_ldx_h:
-        //case INS_ldx_w:
-        //case INS_ldx_d:
-        //case INS_ldx_bu:
-        //case INS_ldx_hu:
-        //case INS_ldx_wu:
-        //case INS_stx_b:
-        //case INS_stx_h:
-        //case INS_stx_w:
-        //case INS_stx_d:
-
-        //case INS_ldgt_b:
-        //case INS_ldgt_h:
-        //case INS_ldgt_w:
-        //case INS_ldgt_d:
-        //case INS_ldle_b:
-        //case INS_ldle_h:
-        //case INS_ldle_w:
-        //case INS_ldle_d:
-        //case INS_stgt_b:
-        //case INS_stgt_h:
-        //case INS_stgt_w:
-        //case INS_stgt_d:
-        //case INS_stle_b:
-        //case INS_stle_h:
-        //case INS_stle_w:
-        //case INS_stle_d:
-
-        //case INS_amswap_w:
-        //case INS_amswap_d:
-        //case INS_amswap_db_w:
-        //case INS_amswap_db_d:
-        //case INS_amadd_w:
-        //case INS_amadd_d:
-        //case INS_amadd_db_w:
-        //case INS_amadd_db_d:
-        //case INS_amand_w:
-        //case INS_amand_d:
-        //case INS_amand_db_w:
-        //case INS_amand_db_d:
-        //case INS_amor_w:
-        //case INS_amor_d:
-        //case INS_amor_db_w:
-        //case INS_amor_db_d:
-        //case INS_amxor_w:
-        //case INS_amxor_d:
-        //case INS_amxor_db_w:
-        //case INS_amxor_db_d:
-        //case INS_ammax_w:
-        //case INS_ammax_d:
-        //case INS_ammax_db_w:
-        //case INS_ammax_db_d:
-        //case INS_ammin_w:
-        //case INS_ammin_d:
-        //case INS_ammin_db_w:
-        //case INS_ammin_db_d:
-        //case INS_ammax_wu:
-        //case INS_ammax_du:
-        //case INS_ammax_db_wu:
-        //case INS_ammax_db_du:
-        //case INS_ammin_wu:
-        //case INS_ammin_du:
-        //case INS_ammin_db_wu:
-        //case INS_ammin_db_du:
-
-        //case INS_crc_w_b_w:
-        //case INS_crc_w_h_w:
-        //case INS_crc_w_w_w:
-        //case INS_crc_w_d_w:
-        //case INS_crcc_w_b_w:
-        //case INS_crcc_w_h_w:
-        //case INS_crcc_w_w_w:
-        //case INS_crcc_w_d_w:
+    if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) ||
+        ((INS_stx_b <= ins) && (ins <= INS_stle_d)))
+    {
+        // case INS_add_w:
+        // case INS_add_d:
+        // case INS_sub_w:
+        // case INS_sub_d:
+        // case INS_and:
+        // case INS_or:
+        // case INS_nor:
+        // case INS_xor:
+        // case INS_andn:
+        // case INS_orn:
+
+        // case INS_mul_w:
+        // case INS_mul_d:
+        // case INS_mulh_w:
+        // case INS_mulh_wu:
+        // case INS_mulh_d:
+        // case INS_mulh_du:
+        // case INS_mulw_d_w:
+        // case INS_mulw_d_wu:
+        // case INS_div_w:
+        // case INS_div_wu:
+        // case INS_div_d:
+        // case INS_div_du:
+        // case INS_mod_w:
+        // case INS_mod_wu:
+        // case INS_mod_d:
+        // case INS_mod_du:
+
+        // case INS_sll_w:
+        // case INS_srl_w:
+        // case INS_sra_w:
+        // case INS_rotr_w:
+        // case INS_sll_d:
+        // case INS_srl_d:
+        // case INS_sra_d:
+        // case INS_rotr_d:
+
+        // case INS_maskeqz:
+        // case INS_masknez:
+
+        // case INS_slt:
+        // case INS_sltu:
+
+        // case INS_ldx_b:
+        // case INS_ldx_h:
+        // case INS_ldx_w:
+        // case INS_ldx_d:
+        // case INS_ldx_bu:
+        // case INS_ldx_hu:
+        // case INS_ldx_wu:
+        // case INS_stx_b:
+        // case INS_stx_h:
+        // case INS_stx_w:
+        // case INS_stx_d:
+
+        // case INS_ldgt_b:
+        // case INS_ldgt_h:
+        // case INS_ldgt_w:
+        // case INS_ldgt_d:
+        // case INS_ldle_b:
+        // case INS_ldle_h:
+        // case INS_ldle_w:
+        // case INS_ldle_d:
+        // case INS_stgt_b:
+        // case INS_stgt_h:
+        // case INS_stgt_w:
+        // case INS_stgt_d:
+        // case INS_stle_b:
+        // case INS_stle_h:
+        // case INS_stle_w:
+        // case INS_stle_d:
+
+        // case INS_amswap_w:
+        // case INS_amswap_d:
+        // case INS_amswap_db_w:
+        // case INS_amswap_db_d:
+        // case INS_amadd_w:
+        // case INS_amadd_d:
+        // case INS_amadd_db_w:
+        // case INS_amadd_db_d:
+        // case INS_amand_w:
+        // case INS_amand_d:
+        // case INS_amand_db_w:
+        // case INS_amand_db_d:
+        // case INS_amor_w:
+        // case INS_amor_d:
+        // case INS_amor_db_w:
+        // case INS_amor_db_d:
+        // case INS_amxor_w:
+        // case INS_amxor_d:
+        // case INS_amxor_db_w:
+        // case INS_amxor_db_d:
+        // case INS_ammax_w:
+        // case INS_ammax_d:
+        // case INS_ammax_db_w:
+        // case INS_ammax_db_d:
+        // case INS_ammin_w:
+        // case INS_ammin_d:
+        // case INS_ammin_db_w:
+        // case INS_ammin_db_d:
+        // case INS_ammax_wu:
+        // case INS_ammax_du:
+        // case INS_ammax_db_wu:
+        // case INS_ammax_db_du:
+        // case INS_ammin_wu:
+        // case INS_ammin_du:
+        // case INS_ammin_db_wu:
+        // case INS_ammin_db_du:
+
+        // case INS_crc_w_b_w:
+        // case INS_crc_w_h_w:
+        // case INS_crc_w_w_w:
+        // case INS_crc_w_d_w:
+        // case INS_crcc_w_b_w:
+        // case INS_crcc_w_h_w:
+        // case INS_crcc_w_w_w:
+        // case INS_crcc_w_d_w:
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert(isGeneralRegisterOrR0(reg3));
 
-        code |= (reg1 /*& 0x1f*/);    //rd
-        code |= (reg2 /*& 0x1f*/)<<5; //rj
-        code |= (reg3 /*& 0x1f*/)<<10;//rk
+        code |= (reg1 /*& 0x1f*/);       // rd
+        code |= (reg2 /*& 0x1f*/) << 5;  // rj
+        code |= (reg3 /*& 0x1f*/) << 10; // rk
     }
-    else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d)) {
-        //case INS_fadd_s:
-        //case INS_fadd_d:
-        //case INS_fsub_s:
-        //case INS_fsub_d:
-        //case INS_fmul_s:
-        //case INS_fmul_d:
-        //case INS_fdiv_s:
-        //case INS_fdiv_d:
-        //case INS_fmax_s:
-        //case INS_fmax_d:
-        //case INS_fmin_s:
-        //case INS_fmin_d:
-        //case INS_fmaxa_s:
-        //case INS_fmaxa_d:
-        //case INS_fmina_s:
-        //case INS_fmina_d:
-        //case INS_fscaleb_s:
-        //case INS_fscaleb_d:
-        //case INS_fcopysign_s:
-        //case INS_fcopysign_d:
+    else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d))
+    {
+        // case INS_fadd_s:
+        // case INS_fadd_d:
+        // case INS_fsub_s:
+        // case INS_fsub_d:
+        // case INS_fmul_s:
+        // case INS_fmul_d:
+        // case INS_fdiv_s:
+        // case INS_fdiv_d:
+        // case INS_fmax_s:
+        // case INS_fmax_d:
+        // case INS_fmin_s:
+        // case INS_fmin_d:
+        // case INS_fmaxa_s:
+        // case INS_fmaxa_d:
+        // case INS_fmina_s:
+        // case INS_fmina_d:
+        // case INS_fscaleb_s:
+        // case INS_fscaleb_d:
+        // case INS_fcopysign_s:
+        // case INS_fcopysign_d:
         assert(isFloatReg(reg1));
         assert(isFloatReg(reg2));
         assert(isFloatReg(reg3));
 
-        code |= (reg1 & 0x1f);    //fd
-        code |= (reg2 & 0x1f)<<5; //fj
-        code |= (reg3 & 0x1f)<<10;//fk
+        code |= (reg1 & 0x1f);       // fd
+        code |= (reg2 & 0x1f) << 5;  // fj
+        code |= (reg3 & 0x1f) << 10; // fk
     }
-    else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d)) {
-        //case INS_fldx_s:
-        //case INS_fldx_d:
-        //case INS_fstx_s:
-        //case INS_fstx_d:
-
-        //case INS_fldgt_s:
-        //case INS_fldgt_d:
-        //case INS_fldle_s:
-        //case INS_fldle_d:
-        //case INS_fstgt_s:
-        //case INS_fstgt_d:
-        //case INS_fstle_s:
-        //case INS_fstle_d:
+    else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d))
+    {
+        // case INS_fldx_s:
+        // case INS_fldx_d:
+        // case INS_fstx_s:
+        // case INS_fstx_d:
+
+        // case INS_fldgt_s:
+        // case INS_fldgt_d:
+        // case INS_fldle_s:
+        // case INS_fldle_d:
+        // case INS_fstgt_s:
+        // case INS_fstgt_d:
+        // case INS_fstle_s:
+        // case INS_fstle_d:
         assert(isFloatReg(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert(isGeneralRegisterOrR0(reg3));
 
-        code |= reg1 & 0x1f; //fd
-        code |= reg2 << 5;   //rj
-        code |= reg3 << 10;  //rk
+        code |= reg1 & 0x1f; // fd
+        code |= reg2 << 5;   // rj
+        code |= reg3 << 10;  // rk
     }
     else
     {
@@ -1812,7 +1826,7 @@ void emitter::emitIns_R_R_R(
     id->idAddr()->iiaSetInstrEncode(code);
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -1832,31 +1846,33 @@ void emitter::emitIns_R_R_R_I(instruction ins,
 {
     code_t code = emitInsCode(ins);
 
-    if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w)) {
-        //INS_alsl_w
-        //INS_alsl_wu
-        //INS_alsl_d
-        //INS_bytepick_w
+    if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w))
+    {
+        // INS_alsl_w
+        // INS_alsl_wu
+        // INS_alsl_d
+        // INS_bytepick_w
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert(isGeneralRegisterOrR0(reg3));
         assert((0 <= imm) && (imm <= 3));
 
-        code |= reg1;    //rd
-        code |= reg2 << 5; //rj
-        code |= reg3 << 10;//rk
-        code |= (imm /*& 0x3*/)<<15; //sa2
+        code |= reg1;                  // rd
+        code |= reg2 << 5;             // rj
+        code |= reg3 << 10;            // rk
+        code |= (imm /*& 0x3*/) << 15; // sa2
     }
-    else if (INS_bytepick_d == ins) {
+    else if (INS_bytepick_d == ins)
+    {
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert(isGeneralRegisterOrR0(reg3));
         assert((0 <= imm) && (imm <= 7));
 
-        code |= reg1;    //rd
-        code |= reg2 << 5; //rj
-        code |= reg3 << 10;//rk
-        code |= (imm /*& 0x7*/)<<15;  //sa3
+        code |= reg1;                  // rd
+        code |= reg2 << 5;             // rj
+        code |= reg3 << 10;            // rk
+        code |= (imm /*& 0x7*/) << 15; // sa3
     }
     else if (INS_fsel == ins)
     {
@@ -1865,10 +1881,10 @@ void emitter::emitIns_R_R_R_I(instruction ins,
         assert(isFloatReg(reg3));
         assert((0 <= imm) && (imm <= 7));
 
-        code |= (reg1 & 0x1f);     //fd
-        code |= (reg2 & 0x1f)<<5;  //fj
-        code |= (reg3 & 0x1f)<<10; //fk
-        code |= (imm /*& 0x7*/)<<15;   //ca
+        code |= (reg1 & 0x1f);         // fd
+        code |= (reg2 & 0x1f) << 5;    // fj
+        code |= (reg3 & 0x1f) << 10;   // fk
+        code |= (imm /*& 0x7*/) << 15; // ca
     }
     else
     {
@@ -1884,7 +1900,7 @@ void emitter::emitIns_R_R_R_I(instruction ins,
     id->idAddr()->iiaSetInstrEncode(code);
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -1902,7 +1918,7 @@ void emitter::emitIns_R_R_R_Ext(instruction ins,
                                 insOpts     opt,         /* = INS_OPTS_NONE */
                                 int         shiftAmount) /* = -1 -- unset   */
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 }
 
 /*****************************************************************************
@@ -1921,19 +1937,19 @@ void emitter::emitIns_R_R_I_I(
     {
         case INS_bstrins_w:
         case INS_bstrpick_w:
-            code |= (reg1 /*& 0x1f*/);    //rd
-            code |= (reg2 /*& 0x1f*/)<<5; //rj
-            assert((0<=imm2) && (imm2<=imm1) && (imm1<32));
-            code |= (imm1 & 0x1f)<<16;    //msbw
-            code |= (imm2 & 0x1f)<<10;    //lsbw
+            code |= (reg1 /*& 0x1f*/);      // rd
+            code |= (reg2 /*& 0x1f*/) << 5; // rj
+            assert((0 <= imm2) && (imm2 <= imm1) && (imm1 < 32));
+            code |= (imm1 & 0x1f) << 16; // msbw
+            code |= (imm2 & 0x1f) << 10; // lsbw
             break;
         case INS_bstrins_d:
         case INS_bstrpick_d:
-            code |= (reg1 /*& 0x1f*/);    //rd
-            code |= (reg2 /*& 0x1f*/)<<5; //rj
-            assert((0<=imm2) && (imm2<=imm1) && (imm1<64));
-            code |= (imm1 & 0x3f)<<16;    //msbd
-            code |= (imm2 & 0x3f)<<10;    //lsbd
+            code |= (reg1 /*& 0x1f*/);      // rd
+            code |= (reg2 /*& 0x1f*/) << 5; // rj
+            assert((0 <= imm2) && (imm2 <= imm1) && (imm1 < 64));
+            code |= (imm1 & 0x3f) << 16; // msbd
+            code |= (imm2 & 0x3f) << 10; // lsbd
             break;
         default:
             unreached();
@@ -1947,7 +1963,7 @@ void emitter::emitIns_R_R_I_I(
     id->idAddr()->iiaSetInstrEncode(code);
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -1961,7 +1977,7 @@ void emitter::emitIns_R_R_R_R(
 {
     code_t code = emitInsCode(ins);
 
-//#ifdef DEBUG
+    //#ifdef DEBUG
     switch (ins)
     {
         case INS_fmadd_s:
@@ -1977,15 +1993,15 @@ void emitter::emitIns_R_R_R_R(
             assert(isFloatReg(reg3));
             assert(isFloatReg(reg4));
 
-            code |= (reg1 & 0x1f);     //fd
-            code |= (reg2 & 0x1f)<<5;  //fj
-            code |= (reg3 & 0x1f)<<10; //fk
-            code |= (reg4 & 0x1f)<<15; //fa
+            code |= (reg1 & 0x1f);       // fd
+            code |= (reg2 & 0x1f) << 5;  // fj
+            code |= (reg3 & 0x1f) << 10; // fk
+            code |= (reg4 & 0x1f) << 15; // fa
             break;
         default:
             unreached();
     }
-//#endif
+    //#endif
 
     instrDesc* id = emitNewInstr(attr);
 
@@ -1994,7 +2010,7 @@ void emitter::emitIns_R_R_R_R(
     id->idAddr()->iiaSetInstrEncode(code);
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -2007,7 +2023,7 @@ void emitter::emitIns_R_R_R_R(
 
 void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NYI("emitIns_C");
 #endif
@@ -2020,7 +2036,7 @@ assert(!"unimplemented on LOONGARCH yet");
 
 void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NYI("emitIns_S");
 #endif
@@ -2064,7 +2080,7 @@ void emitter::emitIns_R_R_S(
 void emitter::emitIns_R_R_S_S(
     instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 }
 
 /*****************************************************************************
@@ -2074,7 +2090,7 @@ assert(!"unimplemented on LOONGARCH yet");
 void emitter::emitIns_S_S_R_R(
     instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 }
 
 /*****************************************************************************
@@ -2083,7 +2099,7 @@ assert(!"unimplemented on LOONGARCH yet");
  */
 void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NYI("emitIns_S_I");
 #endif
@@ -2100,14 +2116,14 @@ void emitter::emitIns_R_C(
     instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs)
 {
     assert(offs >= 0);
-    assert(instrDesc::fitsInSmallCns(offs));//can optimize.
-    //assert(ins == INS_bl);//for special. indicating isGeneralRegister(reg).
-    //assert(isGeneralRegister(reg)); while load float the reg is FPR.
+    assert(instrDesc::fitsInSmallCns(offs)); // can optimize.
+    // assert(ins == INS_bl);//for special. indicating isGeneralRegister(reg).
+    // assert(isGeneralRegister(reg)); while load float the reg is FPR.
 
-    //when id->idIns == bl, for reloc! 4-ins.
+    // when id->idIns == bl, for reloc! 4-ins.
     //   pcaddu12i reg, off-hi-20bits
     //   addi_d  reg, reg, off-lo-12bits
-    //when id->idIns == load-ins, for reloc! 4-ins.
+    // when id->idIns == load-ins, for reloc! 4-ins.
     //   pcaddu12i reg, off-hi-20bits
     //   load  reg, offs_lo-12bits(reg)    #when ins is load ins.
     //
@@ -2124,17 +2140,18 @@ void emitter::emitIns_R_C(
     instrDesc* id = emitNewInstr(attr);
 
     id->idIns(ins);
-    assert(reg != REG_R0); //for special. reg Must not be R0.
-    id->idReg1(reg); // destination register that will get the constant value.
+    assert(reg != REG_R0); // for special. reg Must not be R0.
+    id->idReg1(reg);       // destination register that will get the constant value.
 
-    id->idSmallCns(offs); //usually is 0.
+    id->idSmallCns(offs); // usually is 0.
     id->idInsOpt(INS_OPTS_RC);
     if (emitComp->opts.compReloc)
     {
         id->idSetIsDspReloc();
         id->idCodeSize(8);
-    } else
-        id->idCodeSize(12);//TODO: maybe optimize.
+    }
+    else
+        id->idCodeSize(12); // TODO: maybe optimize.
 
     if (EA_IS_GCREF(attr))
     {
@@ -2149,15 +2166,15 @@ void emitter::emitIns_R_C(
         id->idOpSize(EA_PTRSIZE);
     }
 
-    //TODO: this maybe deleted.
+    // TODO: this maybe deleted.
     id->idSetIsBound(); // We won't patch address since we will know the exact distance
                         // once JIT code and data are allocated together.
 
-    assert(addrReg == REG_NA);//NOTE: for LOONGARCH64, not support addrReg != REG_NA.
+    assert(addrReg == REG_NA); // NOTE: for LOONGARCH64, not support addrReg != REG_NA.
 
     id->idAddr()->iiaFieldHnd = fldHnd;
 
-    //dispIns(id);//loongarch dumping instr by other-fun.
+    // dispIns(id);//loongarch dumping instr by other-fun.
     appendToCurIG(id);
 }
 
@@ -2168,7 +2185,7 @@ void emitter::emitIns_R_C(
 
 void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NYI("emitIns_C_I");
 #endif
@@ -2181,7 +2198,7 @@ assert(!"unimplemented on LOONGARCH yet");
 
 void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     assert(!"emitIns_C_R not supported for RyuJIT backend");
 #endif
@@ -2189,7 +2206,7 @@ assert(!"unimplemented on LOONGARCH yet");
 
 void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NYI("emitIns_R_AR");
 #endif
@@ -2201,8 +2218,8 @@ void emitter::emitIns_R_AI(instruction ins,
                            regNumber   reg,
                            ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
 {
-    assert(EA_IS_RELOC(attr));//EA_PTR_DSP_RELOC
-    assert(ins == INS_bl);//for special.
+    assert(EA_IS_RELOC(attr)); // EA_PTR_DSP_RELOC
+    assert(ins == INS_bl);     // for special.
     assert(isGeneralRegister(reg));
 
     // INS_OPTS_RELOC: placeholders.  2-ins:
@@ -2216,8 +2233,8 @@ void emitter::emitIns_R_AI(instruction ins,
     instrDesc* id = emitNewInstr(attr);
 
     id->idIns(ins);
-    assert(reg != REG_R0); //for special. reg Must not be R0.
-    id->idReg1(reg); // destination register that will get the constant value.
+    assert(reg != REG_R0); // for special. reg Must not be R0.
+    id->idReg1(reg);       // destination register that will get the constant value.
 
     id->idInsOpt(INS_OPTS_RELOC);
 
@@ -2237,13 +2254,13 @@ void emitter::emitIns_R_AI(instruction ins,
     id->idAddr()->iiaAddr = (BYTE*)addr;
 
     id->idCodeSize(8);
-    //dispIns(id);//loongarch dumping instr by other-fun.
+    // dispIns(id);//loongarch dumping instr by other-fun.
     appendToCurIG(id);
 }
 
 void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NYI("emitIns_AR_R");
 #endif
@@ -2251,7 +2268,7 @@ assert(!"unimplemented on LOONGARCH yet");
 
 void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NYI("emitIns_R_ARR");
 #endif
@@ -2259,7 +2276,7 @@ assert(!"unimplemented on LOONGARCH yet");
 
 void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NYI("emitIns_R_ARR");
 #endif
@@ -2268,7 +2285,7 @@ assert(!"unimplemented on LOONGARCH yet");
 void emitter::emitIns_R_ARX(
     instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NYI("emitIns_R_ARR");
 #endif
@@ -2296,7 +2313,7 @@ void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, reg
  */
 void emitter::emitSetShortJump(instrDescJmp* id)
 {
-/* TODO: maybe delete it on future. */
+    /* TODO: maybe delete it on future. */
     return;
 }
 
@@ -2309,11 +2326,11 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu
 {
     assert(dst->bbFlags & BBF_HAS_LABEL);
 
-    //if for reloc!  4-ins:
+    // if for reloc!  4-ins:
     //   pcaddu12i reg, offset-hi20
     //   addi_d  reg, reg, offset-lo12
     //
-    //else:  3-ins:
+    // else:  3-ins:
     //   lu12i_w reg, dst-hi-20bits
     //   ori reg, reg, dst-lo-12bits
     //   bstrins_d  reg, zero, msbd, lsbd / lu32i_d reg, 0xff
@@ -2328,7 +2345,8 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu
     {
         id->idSetIsDspReloc();
         id->idCodeSize(8);
-    } else
+    }
+    else
         id->idCodeSize(12);
 
     id->idReg1(reg);
@@ -2354,31 +2372,32 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu
     }
 #endif // DEBUG
 
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
 void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
 {
-    assert(!"unimplemented on LOONGARCH yet: emitIns_J_R.");//not used.
+    assert(!"unimplemented on LOONGARCH yet: emitIns_J_R."); // not used.
 }
 
-//NOTE:
+// NOTE:
 //  For loongarch64, emitIns_J is just only jump, not include the condition branch!
 //  The condition branch is the emitIns_J_cond_la().
-//  If using "BasicBlock* dst" lable as target, the INS_OPTS_J is a short jump while long jump will be replace by INS_OPTS_JIRL.
+//  If using "BasicBlock* dst" lable as target, the INS_OPTS_J is a short jump while long jump will be replace by
+//  INS_OPTS_JIRL.
 //
 //  The arg "instrCount" is two regs's encoding when ins is beq/bne/blt/bltu/bge/bgeu/beqz/bnez.
 void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
 {
     if (dst == nullptr)
-    {//Now this case not used for loongarch64.
+    { // Now this case not used for loongarch64.
         assert(instrCount != 0);
-        assert(ins == INS_b);//when dst==nullptr, ins is INS_b by now.
+        assert(ins == INS_b); // when dst==nullptr, ins is INS_b by now.
 
 #if 1
-        assert((-33554432 <= instrCount) && (instrCount < 33554432));//0x2000000.
-        emitIns_I(ins, EA_PTRSIZE, instrCount << 2);//NOTE: instrCount is the number of the instructions.
+        assert((-33554432 <= instrCount) && (instrCount < 33554432)); // 0x2000000.
+        emitIns_I(ins, EA_PTRSIZE, instrCount << 2); // NOTE: instrCount is the number of the instructions.
 #else
         instrCount = instrCount << 2;
         if ((-33554432 <= instrCount) && (instrCount < 33554432))
@@ -2388,22 +2407,22 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
         }
         else
         {
-            //NOTE: should not be here !!!
+            // NOTE: should not be here !!!
             assert(!"should not be here on LOONGARCH64 !!!");
 
-            //emitIns_I(INS_bl, EA_PTRSIZE, 4);
+            // emitIns_I(INS_bl, EA_PTRSIZE, 4);
 
-            //ssize_t imm = ((ssize_t)instrCount>>12);
-            //assert(isValidSimm12(imm));
-            //emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm);
-            //imm = (instrCount & 0xfffff);
-            //emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, imm);
+            // ssize_t imm = ((ssize_t)instrCount>>12);
+            // assert(isValidSimm12(imm));
+            // emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm);
+            // imm = (instrCount & 0xfffff);
+            // emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, imm);
 
-            //emitIns_R_R_R(INS_add_d, EA_8BYTE, REG_R21, REG_R21, REG_RA);
-            //emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_R21, 0);
+            // emitIns_R_R_R(INS_add_d, EA_8BYTE, REG_R21, REG_R21, REG_RA);
+            // emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_R21, 0);
         }
 #endif
-        return ;
+        return;
     }
 
     // (dst != nullptr)
@@ -2417,7 +2436,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
     assert((INS_bceqz <= ins) && (ins <= INS_bl));
     id->idIns(ins);
     id->idReg1((regNumber)(instrCount & 0x1f));
-    id->idReg2((regNumber)((instrCount >> 5 ) & 0x1f));
+    id->idReg2((regNumber)((instrCount >> 5) & 0x1f));
 
     id->idInsOpt(INS_OPTS_J);
     emitCounts_INS_OPTS_J++;
@@ -2442,7 +2461,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
     id->idjOffs = emitCurIGsize;
 
     /* Append this jump to this IG's jump list */
-    id->idjNext = emitCurIGjmpList;
+    id->idjNext      = emitCurIGjmpList;
     emitCurIGjmpList = id;
 
 #if EMITTER_STATS
@@ -2450,17 +2469,17 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
 #endif
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
-//NOTE:
+// NOTE:
 //  For loongarch64, emitIns_J_cond_la() is the condition branch.
 //  NOTE: Only supported short branch so far !!!
 //
 void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1, regNumber reg2)
 {
-    //TODO:
+    // TODO:
     //   Now the emitIns_J_cond_la() is only the short condition branch.
     //   There is no long condition branch for loongarch64 so far.
     //   For loongarch64, the long condition branch is like this:
@@ -2499,7 +2518,7 @@ void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1
     id->idjOffs = emitCurIGsize;
 
     /* Append this jump to this IG's jump list */
-    id->idjNext = emitCurIGjmpList;
+    id->idjNext      = emitCurIGjmpList;
     emitCurIGjmpList = id;
 
 #if EMITTER_STATS
@@ -2507,7 +2526,7 @@ void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1
 #endif
 
     id->idCodeSize(4);
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -2515,47 +2534,56 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm)
 {
     assert(!EA_IS_RELOC(size));
     assert(isGeneralRegister(reg));
-    //size = EA_SIZE(size);
+    // size = EA_SIZE(size);
 
-    if (-1 == (imm >> 11) || 0 == (imm >> 11)) {
+    if (-1 == (imm >> 11) || 0 == (imm >> 11))
+    {
         emitIns_R_R_I(INS_addi_w, size, reg, REG_R0, imm);
         return;
     }
 
-    if (0 == (imm >> 12)) {
+    if (0 == (imm >> 12))
+    {
         emitIns_R_R_I(INS_ori, size, reg, REG_R0, imm);
         return;
     }
 
     instrDesc* id = emitNewInstr(size);
 
-    if ((imm == INT64_MAX) || (imm == 0xffffffff)) {
-        //emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1);
-        //emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6);
+    if ((imm == INT64_MAX) || (imm == 0xffffffff))
+    {
+        // emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1);
+        // emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6);
         id->idReg2((regNumber)1); // special for INT64_MAX(ui6=1) or UINT32_MAX(ui6=32);
         id->idCodeSize(8);
-    } else if (-1 == (imm >> 31) || 0 == (imm >> 31)) {
-        //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12));
-        //emitIns_R_R_I(INS_ori, size, reg, reg, imm);
+    }
+    else if (-1 == (imm >> 31) || 0 == (imm >> 31))
+    {
+        // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12));
+        // emitIns_R_R_I(INS_ori, size, reg, reg, imm);
 
         id->idCodeSize(8);
-    } else if (-1 == (imm >> 51) || 0 == (imm >> 51)) {
+    }
+    else if (-1 == (imm >> 51) || 0 == (imm >> 51))
+    {
         // low-32bits.
-        //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12);
-        //emitIns_R_R_I(INS_ori, size, reg, reg, imm);
+        // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12);
+        // emitIns_R_R_I(INS_ori, size, reg, reg, imm);
         //
         // high-20bits.
-        //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
+        // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
 
         id->idCodeSize(12);
-    } else {// 0xffff ffff ffff ffff.
+    }
+    else
+    { // 0xffff ffff ffff ffff.
         // low-32bits.
-        //emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12));
-        //emitIns_R_R_I(INS_ori, size, reg, reg, imm);
+        // emitIns_R_I(INS_lu12i_w, size, reg, (imm >> 12));
+        // emitIns_R_R_I(INS_ori, size, reg, reg, imm);
         //
         // high-32bits.
-        //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
-        //emitIns_R_R_I(INS_lu52i_d, size, reg, reg, (imm>>52));
+        // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
+        // emitIns_R_R_I(INS_lu52i_d, size, reg, reg, (imm>>52));
 
         id->idCodeSize(16);
     }
@@ -2568,7 +2596,7 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm)
 
     id->idAddr()->iiaAddr = (BYTE*)imm;
 
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -2593,10 +2621,9 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm)
 void emitter::emitIns_Call(EmitCallType          callType,
                            CORINFO_METHOD_HANDLE methHnd,
                            INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
-                           void*            addr,
-                           ssize_t          argSize,
-                           emitAttr         retSize
-                           MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                           void*    addr,
+                           ssize_t  argSize,
+                           emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
                            VARSET_VALARG_TP ptrVars,
                            regMaskTP        gcrefRegs,
                            regMaskTP        byrefRegs,
@@ -2610,8 +2637,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
     /* Sanity check the arguments depending on callType */
 
     assert(callType < EC_COUNT);
-    assert((callType != EC_FUNC_TOKEN) ||
-           (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
+    assert((callType != EC_FUNC_TOKEN) || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
     assert(callType < EC_INDIR_R || addr == NULL);
     assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
 
@@ -2691,7 +2717,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
     id->idIns(ins);
 
     id->idInsOpt(INS_OPTS_C);
-    //TODO: maybe optimize.
+    // TODO: maybe optimize.
 
     // INS_OPTS_C: placeholders.  1/2/4-ins:
     //   if (callType == EC_INDIR_R)
@@ -2712,13 +2738,13 @@ void emitter::emitIns_Call(EmitCallType          callType,
     if (callType == EC_INDIR_R)
     {
         /* This is an indirect call (either a virtual call or func ptr call) */
-        //assert(callType == EC_INDIR_R);
+        // assert(callType == EC_INDIR_R);
 
         id->idSetIsCallRegPtr();
 
         regNumber reg_jirl = isJump ? REG_R0 : REG_RA;
         id->idReg4(reg_jirl);
-        id->idReg3(ireg);//NOTE: for EC_INDIR_R, using idReg3.
+        id->idReg3(ireg); // NOTE: for EC_INDIR_R, using idReg3.
         assert(xreg == REG_NA);
 
         id->idCodeSize(4);
@@ -2731,14 +2757,16 @@ void emitter::emitIns_Call(EmitCallType          callType,
         assert(addr != NULL);
         assert(((long)addr & 3) == 0);
 
-        addr = (void*)((long)addr + (isJump ? 0 : 1));//NOTE: low-bit0 is used for jirl ra/r0,rd,0
+        addr                  = (void*)((long)addr + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0
         id->idAddr()->iiaAddr = (BYTE*)addr;
 
         if (emitComp->opts.compReloc)
         {
             id->idSetIsDspReloc();
             id->idCodeSize(8);
-        } else {
+        }
+        else
+        {
             id->idCodeSize(16);
         }
     }
@@ -2764,7 +2792,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
     }
 #endif // LATE_DISASM
 
-    //dispIns(id);
+    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -2776,8 +2804,8 @@ void emitter::emitIns_Call(EmitCallType          callType,
 unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
 {
     unsigned char callInstrSize = sizeof(code_t); // 4 bytes
-    regMaskTP           gcrefRegs;
-    regMaskTP           byrefRegs;
+    regMaskTP     gcrefRegs;
+    regMaskTP     byrefRegs;
 
     VARSET_TP GCvars(VarSetOps::UninitVal());
 
@@ -2807,17 +2835,17 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
 
     emitUpdateLiveGCvars(GCvars, dst);
 #ifdef DEBUG
-    //NOTEADD:
+    // NOTEADD:
     // Output any delta in GC variable info, corresponding to the before-call GC var updates done above.
     if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC)
     {
-        emitDispGCVarDelta(); //define in emit.cpp
+        emitDispGCVarDelta(); // define in emit.cpp
     }
 #endif // DEBUG
 
     assert(id->idIns() == INS_jirl);
     if (id->idIsCallRegPtr())
-    {//EC_INDIR_R
+    { // EC_INDIR_R
         code = emitInsCode(id->idIns());
         D_INST_JIRL(code, id->idReg4(), id->idReg3(), 0);
     }
@@ -2828,15 +2856,15 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
         //   pcaddu18i  t2, addr-hi20
         //   jilr r0/1,t2,addr-lo18
 
-        long addr = (long)id->idAddr()->iiaAddr;//get addr.
-        //should assert(addr-dst < 38bits);
+        long addr = (long)id->idAddr()->iiaAddr; // get addr.
+        // should assert(addr-dst < 38bits);
 
         int reg2 = (int)addr & 1;
-        addr = addr ^ 1;
+        addr     = addr ^ 1;
 
         emitRecordRelocation(dst, (BYTE*)addr, IMAGE_REL_LOONGARCH64_PC);
 
-        *(code_t *)dst = 0x1e00000e;
+        *(code_t*)dst = 0x1e00000e;
         dst += 4;
 #ifdef DEBUG
         code = emitInsCode(INS_pcaddu18i);
@@ -2845,37 +2873,37 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
         code = emitInsCode(INS_jirl);
         assert(code == 0x4c000000);
 #endif
-        *(code_t *)dst = 0x4c000000 | (14<<5) | reg2;
+        *(code_t*)dst = 0x4c000000 | (14 << 5) | reg2;
     }
     else
     {
-    //      lu12i_w  t2, dst_offset_lo32-hi   //TODO: maybe optimize.
-    //      ori  t2, t2, dst_offset_lo32-lo
-    //      lu32i_d  t2, dst_offset_hi32-lo
-    //      jirl  t2
+        //      lu12i_w  t2, dst_offset_lo32-hi   //TODO: maybe optimize.
+        //      ori  t2, t2, dst_offset_lo32-lo
+        //      lu32i_d  t2, dst_offset_hi32-lo
+        //      jirl  t2
 
         ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr);
-        //assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff.
-        assert((imm >> 32) == 0xff);//for LA64 addr-is 0xff. but this is not the best !!!
+        // assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff.
+        assert((imm >> 32) == 0xff); // for LA64 addr-is 0xff. but this is not the best !!!
 
         int reg2 = (int)(imm & 1);
         imm -= reg2;
 
         code = emitInsCode(INS_lu12i_w);
         D_INST_lu12i_w(code, REG_T2, imm >> 12);
-        *(code_t *)dst = code;
+        *(code_t*)dst = code;
         dst += 4;
 
         code = emitInsCode(INS_ori);
         D_INST_ori(code, REG_T2, REG_T2, imm);
-        *(code_t *)dst = code;
+        *(code_t*)dst = code;
         dst += 4;
 
-        //emitIns_R_I(INS_lu32i_d, size, REG_T2, imm >> 32);
+        // emitIns_R_I(INS_lu32i_d, size, REG_T2, imm >> 32);
         code = emitInsCode(INS_lu32i_d);
-        //D_INST_lu32i_d(code, REG_T2, imm >> 32);
+        // D_INST_lu32i_d(code, REG_T2, imm >> 32);
         D_INST_lu32i_d(code, REG_T2, 0xff);
-        *(code_t *)dst = code;
+        *(code_t*)dst = code;
         dst += 4;
 
         code = emitInsCode(INS_jirl);
@@ -2958,7 +2986,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
     }
     else
     {
-        callInstrSize = id->idIsReloc()? (2 << 2) : (4 << 2);// INS_OPTS_C: 2/4-ins.
+        callInstrSize = id->idIsReloc() ? (2 << 2) : (4 << 2); // INS_OPTS_C: 2/4-ins.
     }
 
     return callInstrSize;
@@ -2972,7 +3000,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
 /*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code)
 {
     assert(sizeof(code_t) == 4);
-    BYTE* dstRW = dst + writeableOffset;
+    BYTE* dstRW       = dst + writeableOffset;
     *((code_t*)dstRW) = code;
 
     return sizeof(code_t);
@@ -2989,11 +3017,11 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
 
 size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 {
-    BYTE* dst = *dp;
-    BYTE* dst2 = dst;//addr for updating gc info if needed.
-    code_t code = 0;
+    BYTE*       dst  = *dp;
+    BYTE*       dst2 = dst; // addr for updating gc info if needed.
+    code_t      code = 0;
     instruction ins;
-    size_t sz;// = emitSizeOfInsDsc(id);
+    size_t      sz; // = emitSizeOfInsDsc(id);
 
 #ifdef DEBUG
 #if DUMP_GC_TABLES
@@ -3022,7 +3050,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
             emitRecordRelocation(dst, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC);
 
-            *(code_t *)dst = 0x1c000000 | (code_t)reg1;
+            *(code_t*)dst = 0x1c000000 | (code_t)reg1;
             dst += 4;
             dst2 = dst;
 
@@ -3037,14 +3065,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
             if (id->idIsCnsReloc())
             {
-                ins = INS_addi_d;
-                *(code_t *)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1<<5);
+                ins           = INS_addi_d;
+                *(code_t*)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1 << 5);
             }
-            else //if (id->idIsDspReloc())
+            else // if (id->idIsDspReloc())
             {
                 assert(id->idIsDspReloc());
-                ins = INS_ldptr_d;
-                *(code_t *)dst = 0x26000000 | (code_t)reg1 | (code_t)(reg1<<5);
+                ins           = INS_ldptr_d;
+                *(code_t*)dst = 0x26000000 | (code_t)reg1 | (code_t)(reg1 << 5);
             }
 
             if (id->idGCref() != GCT_NONE)
@@ -3058,114 +3086,116 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
             dst += 4;
 
-            sz  = sizeof(instrDesc);
+            sz = sizeof(instrDesc);
         }
-            break;
+        break;
         case INS_OPTS_I:
         {
-            ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr);
+            ssize_t   imm  = (ssize_t)(id->idAddr()->iiaAddr);
             regNumber reg1 = id->idReg1();
-            dst2 += 4;//assert(dst2 == dst);
+            dst2 += 4; // assert(dst2 == dst);
 
             switch (id->idCodeSize())
             {
-            case 8://if (id->idCodeSize() == 8)
-            {
-                if (id->idReg2()) { // special for INT64_MAX or UINT32_MAX;
-                    code = emitInsCode(INS_addi_d);
-                    //emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1);
-                    D_INST_2RI12(code, reg1, REG_R0, -1);
-                    *(code_t *)dst = code;
-                    dst += 4;
-
-                    ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32;
-                    code = emitInsCode(INS_srli_d);
-                    //emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6);
-                    code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10));
-                    *(code_t *)dst = code;
+                case 8: // if (id->idCodeSize() == 8)
+                {
+                    if (id->idReg2())
+                    { // special for INT64_MAX or UINT32_MAX;
+                        code = emitInsCode(INS_addi_d);
+                        // emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1);
+                        D_INST_2RI12(code, reg1, REG_R0, -1);
+                        *(code_t*)dst = code;
+                        dst += 4;
+
+                        ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32;
+                        code        = emitInsCode(INS_srli_d);
+                        // emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6);
+                        code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10));
+                        *(code_t*)dst = code;
+                    }
+                    else
+                    {
+                        code = emitInsCode(INS_lu12i_w);
+                        D_INST_lu12i_w(code, reg1, imm >> 12);
+                        *(code_t*)dst = code;
+                        dst += 4;
+
+                        code = emitInsCode(INS_ori);
+                        D_INST_ori(code, reg1, reg1, imm);
+                        *(code_t*)dst = code;
+                    }
+                    break;
                 }
-                else {
+                case 12: // else if (id->idCodeSize() == 12)
+                {
                     code = emitInsCode(INS_lu12i_w);
                     D_INST_lu12i_w(code, reg1, imm >> 12);
-                    *(code_t *)dst = code;
+                    *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(INS_ori);
                     D_INST_ori(code, reg1, reg1, imm);
-                    *(code_t *)dst = code;
-                }
-                break;
-            }
-            case 12: //else if (id->idCodeSize() == 12)
-            {
-                code = emitInsCode(INS_lu12i_w);
-                D_INST_lu12i_w(code, reg1, imm >> 12);
-                *(code_t *)dst = code;
-                dst += 4;
-
-                code = emitInsCode(INS_ori);
-                D_INST_ori(code, reg1, reg1, imm);
-                *(code_t *)dst = code;
-                dst += 4;
+                    *(code_t*)dst = code;
+                    dst += 4;
 
-                code = emitInsCode(INS_lu32i_d);
-                //emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
-                D_INST_lu32i_d(code, reg1, imm >> 32);
-                *(code_t *)dst = code;
+                    code = emitInsCode(INS_lu32i_d);
+                    // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
+                    D_INST_lu32i_d(code, reg1, imm >> 32);
+                    *(code_t*)dst = code;
 
-                break;
-            }
-            case 16://else if (id->idCodeSize() == 16)
-            {
-                code = emitInsCode(INS_lu12i_w);
-                D_INST_lu12i_w(code, reg1, imm >> 12);
-                *(code_t *)dst = code;
-                dst += 4;
+                    break;
+                }
+                case 16: // else if (id->idCodeSize() == 16)
+                {
+                    code = emitInsCode(INS_lu12i_w);
+                    D_INST_lu12i_w(code, reg1, imm >> 12);
+                    *(code_t*)dst = code;
+                    dst += 4;
 
-                code = emitInsCode(INS_ori);
-                D_INST_ori(code, reg1, reg1, imm);
-                *(code_t *)dst = code;
-                dst += 4;
+                    code = emitInsCode(INS_ori);
+                    D_INST_ori(code, reg1, reg1, imm);
+                    *(code_t*)dst = code;
+                    dst += 4;
 
-                code = emitInsCode(INS_lu32i_d);
-                D_INST_lu32i_d(code, reg1, imm >> 32);
-                *(code_t *)dst = code;
-                dst += 4;
+                    code = emitInsCode(INS_lu32i_d);
+                    D_INST_lu32i_d(code, reg1, imm >> 32);
+                    *(code_t*)dst = code;
+                    dst += 4;
 
-                code = emitInsCode(INS_lu52i_d);
-                D_INST_lu52i_d(code, reg1, reg1, imm >> 52);
-                *(code_t *)dst = code;
+                    code = emitInsCode(INS_lu52i_d);
+                    D_INST_lu52i_d(code, reg1, reg1, imm >> 52);
+                    *(code_t*)dst = code;
 
-                break;
-            }
-            default :
-                unreached();
-                break;
+                    break;
+                }
+                default:
+                    unreached();
+                    break;
             }
 
             ins = INS_ori;
             dst += 4;
 
-            sz  = sizeof(instrDesc);
+            sz = sizeof(instrDesc);
         }
-            break;
+        break;
         case INS_OPTS_RC:
         {
             // Reference to JIT data
 
-            //when id->idIns == bl, for reloc!
+            // when id->idIns == bl, for reloc!
             //   pcaddu12i r21, off-hi-20bits
             //   addi_d  reg, r21, off-lo-12bits
-            //when id->idIns == load-ins
+            // when id->idIns == load-ins
             //   pcaddu12i r21, off-hi-20bits
             //   load  reg, offs_lo-12bits(r21)    #when ins is load ins.
             //
-            //when id->idIns == bl
+            // when id->idIns == bl
             //   lu12i_w r21, addr-hi-20bits
             //   ori     reg, r21, addr-lo-12bits
             //   lu32i_d reg, addr_hi-32bits
             //
-            //when id->idIns == load-ins
+            // when id->idIns == load-ins
             //   lu12i_w r21, offs_hi-20bits
             //   lu32i_d r21, 0xff  addr_hi-32bits
             //   load  reg, addr_lo-12bits(r21)
@@ -3182,12 +3212,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
             assert(dataOffs < emitDataSize());
 
-            ins = id->idIns();
+            ins            = id->idIns();
             regNumber reg1 = id->idReg1();
 
             if (id->idIsReloc())
             {
-                //get the addr-offset of the data.
+                // get the addr-offset of the data.
                 imm = (ssize_t)emitConsBlock - (ssize_t)dst + dataOffs;
                 assert(imm > 0);
                 assert(!(imm & 3));
@@ -3196,14 +3226,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 imm += doff;
                 assert(isValidSimm20(imm >> 12));
 
-                doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit.
+                doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit.
 
 #ifdef DEBUG
                 code = emitInsCode(INS_pcaddu12i);
                 assert(code == 0x1c000000);
 #endif
-                code = 0x1c000000 | 21;
-                *(code_t *)dst = code | (((code_t)imm & 0xfffff000) >> 7);
+                code          = 0x1c000000 | 21;
+                *(code_t*)dst = code | (((code_t)imm & 0xfffff000) >> 7);
                 dst += 4;
 
                 if (ins == INS_bl)
@@ -3214,92 +3244,92 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                     code = emitInsCode(INS_addi_d);
                     assert(code == 0x02c00000);
 #endif
-                    code = 0x02c00000 | (21<<5);
-                    *(code_t *)dst = code | (code_t)reg1 | (((code_t)doff & 0xfff) << 10);
+                    code          = 0x02c00000 | (21 << 5);
+                    *(code_t*)dst = code | (code_t)reg1 | (((code_t)doff & 0xfff) << 10);
                 }
                 else
                 {
                     code = emitInsCode(ins);
-                    D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff);//NOTE:here must be REG_R21 !!!
-                    *(code_t *)dst = code;
+                    D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff); // NOTE:here must be REG_R21 !!!
+                    *(code_t*)dst = code;
                 }
                 dst += 4;
                 dst2 = dst;
             }
             else
             {
-                //get the addr of the data.
+                // get the addr of the data.
                 imm = (ssize_t)emitConsBlock + dataOffs;
 
                 code = emitInsCode(INS_lu12i_w);
                 if (ins == INS_bl)
                 {
                     assert((imm >> 32) == 0xff);
-                    //assert((imm >> 32) <= 0x7ffff);
+                    // assert((imm >> 32) <= 0x7ffff);
 
                     doff = (int)imm >> 12;
                     D_INST_lu12i_w(code, REG_R21, doff);
-                    *(code_t *)dst = code;
+                    *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(INS_ori);
                     D_INST_ori(code, reg1, REG_R21, imm);
-                    *(code_t *)dst = code;
+                    *(code_t*)dst = code;
                     dst += 4;
                     dst2 = dst;
 
-                    ins = INS_lu32i_d;
+                    ins  = INS_lu32i_d;
                     code = emitInsCode(INS_lu32i_d);
-                    //D_INST_lu32i_d(code, reg1, imm >> 32);
+                    // D_INST_lu32i_d(code, reg1, imm >> 32);
                     D_INST_lu32i_d(code, reg1, 0xff);
-                    *(code_t *)dst = code;
+                    *(code_t*)dst = code;
                     dst += 4;
                 }
                 else
                 {
                     doff = (int)(imm & 0x800);
                     imm += doff;
-                    doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit.
+                    doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit.
 
                     assert((imm >> 32) == 0xff);
-                    //assert((imm >> 32) <= 0x7ffff);
+                    // assert((imm >> 32) <= 0x7ffff);
 
-                    dataOffs = (unsigned)(imm >> 12); //addr-hi-20bits.
+                    dataOffs = (unsigned)(imm >> 12); // addr-hi-20bits.
                     D_INST_lu12i_w(code, REG_R21, dataOffs);
-                    *(code_t *)dst = code;
+                    *(code_t*)dst = code;
                     dst += 4;
 
-                    //emitIns_R_I(INS_lu32i_d, size, REG_R21, imm >> 32);
+                    // emitIns_R_I(INS_lu32i_d, size, REG_R21, imm >> 32);
                     code = emitInsCode(INS_lu32i_d);
-                    //D_INST_lu32i_d(code, REG_R21, imm >> 32);
+                    // D_INST_lu32i_d(code, REG_R21, imm >> 32);
                     D_INST_lu32i_d(code, REG_R21, 0xff);
-                    *(code_t *)dst = code;
+                    *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(ins);
                     D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff);
-                    *(code_t *)dst = code;
+                    *(code_t*)dst = code;
                     dst += 4;
                     dst2 = dst;
                 }
             }
 
-            sz  = sizeof(instrDesc);
+            sz = sizeof(instrDesc);
         }
-            break;
+        break;
 
         case INS_OPTS_RL:
         {
-            //if for reloc!
+            // if for reloc!
             //   pcaddu12i reg, offset-hi20
             //   addi_d  reg, reg, offset-lo12
             //
-            //else:       ////TODO:optimize.
+            // else:       ////TODO:optimize.
             //   lu12i_w reg, dst-hi-12bits
             //   ori reg, reg, dst-lo-12bits
             //   lu32i_d reg, dst-hi-32bits
 
-            insGroup* tgtIG = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel);
+            insGroup* tgtIG          = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel);
             id->idAddr()->iiaIGlabel = tgtIG;
 
             regNumber reg1 = id->idReg1();
@@ -3308,17 +3338,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             if (id->idIsReloc())
             {
                 ssize_t imm = (ssize_t)tgtIG->igOffs;
-                imm = (ssize_t)emitCodeBlock + imm - (ssize_t)dst;
+                imm         = (ssize_t)emitCodeBlock + imm - (ssize_t)dst;
                 assert((imm & 3) == 0);
 
                 int doff = (int)(imm & 0x800);
                 imm += doff;
                 assert(isValidSimm20(imm >> 12));
 
-                doff = (int)(imm & 0x7ff) - doff;//addr-lo-12bit.
+                doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit.
 
-                code = 0x1c000000;
-                *(code_t *)dst = code | (code_t)reg1 | ((imm & 0xfffff000)>>7);
+                code          = 0x1c000000;
+                *(code_t*)dst = code | (code_t)reg1 | ((imm & 0xfffff000) >> 7);
                 dst += 4;
                 dst2 = dst;
 #ifdef DEBUG
@@ -3327,207 +3357,208 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 code = emitInsCode(INS_addi_d);
                 assert(code == 0x02c00000);
 #endif
-                *(code_t *)dst = 0x02c00000 | (code_t)reg1 | ((code_t)reg1<<5) | ((doff & 0xfff)<<10);
-                ins = INS_addi_d;
-            } else
+                *(code_t*)dst = 0x02c00000 | (code_t)reg1 | ((code_t)reg1 << 5) | ((doff & 0xfff) << 10);
+                ins           = INS_addi_d;
+            }
+            else
             {
                 ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock;
-                //assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff
+                // assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff
                 assert((imm >> 32) == 0xff);
 
                 code = emitInsCode(INS_lu12i_w);
                 D_INST_lu12i_w(code, REG_R21, imm >> 12);
-                *(code_t *)dst = code;
+                *(code_t*)dst = code;
                 dst += 4;
 
                 code = emitInsCode(INS_ori);
                 D_INST_ori(code, reg1, REG_R21, imm);
-                *(code_t *)dst = code;
+                *(code_t*)dst = code;
                 dst += 4;
                 dst2 = dst;
 
                 ins = INS_lu32i_d;
-                //emitIns_R_I(INS_lu32i_d, size, reg1, 0xff);
+                // emitIns_R_I(INS_lu32i_d, size, reg1, 0xff);
                 code = emitInsCode(INS_lu32i_d);
-                //D_INST_lu32i_d(code, reg1, imm >> 32);
+                // D_INST_lu32i_d(code, reg1, imm >> 32);
                 D_INST_lu32i_d(code, reg1, 0xff);
-                *(code_t *)dst = code;
+                *(code_t*)dst = code;
             }
 
             dst += 4;
 
-            sz  = sizeof(instrDesc);
+            sz = sizeof(instrDesc);
         }
-            break;
+        break;
         case INS_OPTS_JIRL:
-        //  case_1:           <----------from INS_OPTS_J:
-        //   xor r21,reg1,reg2   |   bne/beq  _next   |    bcnez/bceqz  _next
-        //   bnez/beqz  dst      |   b  dst           |    b  dst
-        //_next:
-        //
-        //  case_2:           <---------- TODO: from INS_OPTS_J:
-        //   bnez/beqz  _next:
-        //   pcaddi r21,off-hi
-        //   jirl  r0,r21,off-lo
-        //_next:
-        //
-        //  case_3:           <----------INS_OPTS_JIRL:   //not used by now !!!
-        //   b dst
-        //
-        //  case_4:           <----------INS_OPTS_JIRL:   //not used by now !!!
-        //   pcaddi r21,off-hi
-        //   jirl  r0,r21,off-lo
-        //
-        {
-            instrDescJmp* jmp = (instrDescJmp*) id;
-
-            regNumber reg1 = id->idReg1();
+            //  case_1:           <----------from INS_OPTS_J:
+            //   xor r21,reg1,reg2   |   bne/beq  _next   |    bcnez/bceqz  _next
+            //   bnez/beqz  dst      |   b  dst           |    b  dst
+            //_next:
+            //
+            //  case_2:           <---------- TODO: from INS_OPTS_J:
+            //   bnez/beqz  _next:
+            //   pcaddi r21,off-hi
+            //   jirl  r0,r21,off-lo
+            //_next:
+            //
+            //  case_3:           <----------INS_OPTS_JIRL:   //not used by now !!!
+            //   b dst
+            //
+            //  case_4:           <----------INS_OPTS_JIRL:   //not used by now !!!
+            //   pcaddi r21,off-hi
+            //   jirl  r0,r21,off-lo
+            //
             {
-                ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset();
-                imm -= 4;
+                instrDescJmp* jmp = (instrDescJmp*)id;
 
-                ins = jmp->idIns();
-                assert(jmp->idCodeSize() > 4); //The original INS_OPTS_JIRL: not used by now!!!
-                switch (jmp->idCodeSize())
+                regNumber reg1 = id->idReg1();
                 {
-                    case 8:
+                    ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset();
+                    imm -= 4;
+
+                    ins = jmp->idIns();
+                    assert(jmp->idCodeSize() > 4); // The original INS_OPTS_JIRL: not used by now!!!
+                    switch (jmp->idCodeSize())
                     {
-                        regNumber reg2 = id->idReg2();
-                        assert((INS_bceqz <= ins) && (ins <= INS_bgeu));
-                        //assert((INS_bceqz <= ins) && (ins <= INS_bl));//TODO
-                        if ((INS_beq == ins) || (INS_bne == ins))
+                        case 8:
                         {
-                            if ((-0x400000 <= imm) && (imm < 0x400000))
+                            regNumber reg2 = id->idReg2();
+                            assert((INS_bceqz <= ins) && (ins <= INS_bgeu));
+                            // assert((INS_bceqz <= ins) && (ins <= INS_bl));//TODO
+                            if ((INS_beq == ins) || (INS_bne == ins))
                             {
-                                code = emitInsCode(INS_xor);
-                                D_INST_3R(code, REG_R21, reg1, reg2);
-                                *(code_t *)dst = code;
+                                if ((-0x400000 <= imm) && (imm < 0x400000))
+                                {
+                                    code = emitInsCode(INS_xor);
+                                    D_INST_3R(code, REG_R21, reg1, reg2);
+                                    *(code_t*)dst = code;
+                                    dst += 4;
+
+                                    code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez);
+                                    D_INST_Bcond_Z(code, REG_R21, imm);
+                                    *(code_t*)dst = code;
+                                    dst += 4;
+                                }
+                                else // if ((-0x8000000 <= imm) && (imm < 0x8000000))
+                                {
+                                    assert((-0x8000000 <= imm) && (imm < 0x8000000));
+                                    assert((INS_bne & 0xfffe) == INS_beq);
+
+                                    code = emitInsCode((instruction)((int)ins ^ 0x1));
+                                    code |= ((code_t)(reg1) /*& 0x1f */) << 5; /* rj */
+                                    code |= ((code_t)(reg2) /*& 0x1f */);      /* rd */
+                                    code |= 0x800;
+                                    *(code_t*)dst = code;
+                                    dst += 4;
+
+                                    code = emitInsCode(INS_b);
+                                    D_INST_B(code, imm);
+                                    *(code_t*)dst = code;
+                                    dst += 4;
+                                }
+                                // else
+                                //    unreached();
+                            }
+                            else if ((INS_bceqz == ins) || (INS_bcnez == ins))
+                            {
+                                assert((-0x8000000 <= imm) && (imm < 0x8000000));
+                                assert((INS_bcnez & 0xfffe) == INS_bceqz);
+
+                                code = emitInsCode((instruction)((int)ins ^ 0x1));
+                                code |= ((code_t)reg1) << 5; /* rj */
+                                code |= 0x800;
+                                *(code_t*)dst = code;
                                 dst += 4;
 
-                                code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez);
-                                D_INST_Bcond_Z(code, REG_R21, imm);
-                                *(code_t *)dst = code;
+                                code = emitInsCode(INS_b);
+                                D_INST_B(code, imm);
+                                *(code_t*)dst = code;
                                 dst += 4;
                             }
-                            else //if ((-0x8000000 <= imm) && (imm < 0x8000000))
+                            else if ((INS_blt <= ins) && (ins <= INS_bgeu))
                             {
                                 assert((-0x8000000 <= imm) && (imm < 0x8000000));
-                                assert((INS_bne & 0xfffe) == INS_beq);
+                                assert((INS_bge & 0xfffe) == INS_blt);
+                                assert((INS_bgeu & 0xfffe) == INS_bltu);
 
                                 code = emitInsCode((instruction)((int)ins ^ 0x1));
-                                code |= ((code_t)(reg1) /*& 0x1f */)<<5; /* rj */
-                                code |= ((code_t)(reg2) /*& 0x1f */); /* rd */
+                                code |= ((code_t)(reg1) /*& 0x1f */) << 5; /* rj */
+                                code |= ((code_t)(reg2) /*& 0x1f */);      /* rd */
                                 code |= 0x800;
-                                *(code_t *)dst = code;
+                                *(code_t*)dst = code;
                                 dst += 4;
 
                                 code = emitInsCode(INS_b);
                                 D_INST_B(code, imm);
-                                *(code_t *)dst = code;
+                                *(code_t*)dst = code;
                                 dst += 4;
                             }
-                            //else
-                            //    unreached();
-                        }
-                        else if ((INS_bceqz == ins) || (INS_bcnez == ins))
-                        {
-                            assert((-0x8000000 <= imm) && (imm < 0x8000000));
-                            assert((INS_bcnez & 0xfffe) == INS_bceqz);
-
-                            code = emitInsCode((instruction)((int)ins ^ 0x1));
-                            code |= ((code_t)reg1)<<5; /* rj */
-                            code |= 0x800;
-                            *(code_t *)dst = code;
-                            dst += 4;
-
-                            code = emitInsCode(INS_b);
-                            D_INST_B(code, imm);
-                            *(code_t *)dst = code;
-                            dst += 4;
-                        }
-                        else if ((INS_blt <= ins) && (ins <= INS_bgeu))
-                        {
-                            assert((-0x8000000 <= imm) && (imm < 0x8000000));
-                            assert((INS_bge & 0xfffe) == INS_blt);
-                            assert((INS_bgeu & 0xfffe) == INS_bltu);
-
-                            code = emitInsCode((instruction)((int)ins ^ 0x1));
-                            code |= ((code_t)(reg1) /*& 0x1f */)<<5; /* rj */
-                            code |= ((code_t)(reg2) /*& 0x1f */); /* rd */
-                            code |= 0x800;
-                            *(code_t *)dst = code;
-                            dst += 4;
-
-                            code = emitInsCode(INS_b);
-                            D_INST_B(code, imm);
-                            *(code_t *)dst = code;
-                            dst += 4;
+                            break;
                         }
-                        break;
+                        // case 12:
+                        default:
+                            unreached();
+                            break;
                     }
-                    //case 12:
-                    default :
-                        unreached();
-                        break;
                 }
+                sz = sizeof(instrDescJmp);
             }
-            sz  = sizeof(instrDescJmp);
-        }
             break;
         case INS_OPTS_J_cond:
             //   b_cond  dst-relative.
             //
-            //NOTE:
+            // NOTE:
             //  the case "imm > 0x7fff" not supported.
             //  More info within the emitter::emitIns_J_cond_la();
-        {
-            ssize_t imm = (ssize_t) id->idAddr()->iiaGetJmpOffset();//get jmp's offset relative delay-slot.
-            assert((OFFSET_DIST_SMALL_MAX_NEG << 2) <= imm && imm <= (OFFSET_DIST_SMALL_MAX_POS << 2));
-            assert(!(imm & 3));
+            {
+                ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot.
+                assert((OFFSET_DIST_SMALL_MAX_NEG << 2) <= imm && imm <= (OFFSET_DIST_SMALL_MAX_POS << 2));
+                assert(!(imm & 3));
 
-            ins = id->idIns();
-            code = emitInsCode(ins);
-            D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm);
-            *(code_t *)dst = code;
-            dst += 4;
+                ins  = id->idIns();
+                code = emitInsCode(ins);
+                D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm);
+                *(code_t*)dst = code;
+                dst += 4;
 
-            sz  = sizeof(instrDescJmp);
-        }
+                sz = sizeof(instrDescJmp);
+            }
             break;
         case INS_OPTS_J:
-        //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl  dst-relative.
-        {
-            ssize_t imm = (ssize_t) id->idAddr()->iiaGetJmpOffset();//get jmp's offset relative delay-slot.
-            assert(!(imm & 3));
-
-            ins = id->idIns();
-            code = emitInsCode(ins);
-            if (ins == INS_b || ins == INS_bl)
-            {
-                D_INST_B(code, imm);
-            }
-            else if (ins == INS_bnez || ins == INS_beqz)
-            {
-                D_INST_Bcond_Z(code, id->idReg1(), imm);
-            }
-            else if (ins == INS_bcnez || ins == INS_bceqz)
-            {
-                assert((code_t)(id->idReg1()) < 8);//cc
-                D_INST_Bcond_Z(code, id->idReg1(), imm);
-            }
-            else if ((INS_beq <= ins) && (ins <= INS_bgeu))
-            {
-                D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm);
-            }
-            else
+            //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl  dst-relative.
             {
-                assert(!"unimplemented on LOONGARCH yet");
-            }
-            *(code_t *)dst = code;
-            dst += 4;
+                ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot.
+                assert(!(imm & 3));
 
-            sz  = sizeof(instrDescJmp);
-        }
+                ins  = id->idIns();
+                code = emitInsCode(ins);
+                if (ins == INS_b || ins == INS_bl)
+                {
+                    D_INST_B(code, imm);
+                }
+                else if (ins == INS_bnez || ins == INS_beqz)
+                {
+                    D_INST_Bcond_Z(code, id->idReg1(), imm);
+                }
+                else if (ins == INS_bcnez || ins == INS_bceqz)
+                {
+                    assert((code_t)(id->idReg1()) < 8); // cc
+                    D_INST_Bcond_Z(code, id->idReg1(), imm);
+                }
+                else if ((INS_beq <= ins) && (ins <= INS_bgeu))
+                {
+                    D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm);
+                }
+                else
+                {
+                    assert(!"unimplemented on LOONGARCH yet");
+                }
+                *(code_t*)dst = code;
+                dst += 4;
+
+                sz = sizeof(instrDescJmp);
+            }
             break;
 
         case INS_OPTS_C:
@@ -3546,14 +3577,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             ins = INS_nop;
             break;
 
-        //case INS_OPTS_NONE:
+        // case INS_OPTS_NONE:
         default:
-            //assert(id->idGCref() == GCT_NONE);
-            *(code_t *)dst = id->idAddr()->iiaGetInstrEncode();
+            // assert(id->idGCref() == GCT_NONE);
+            *(code_t*)dst = id->idAddr()->iiaGetInstrEncode();
             dst += 4;
             dst2 = dst;
-            ins = id->idIns();
-            sz = emitSizeOfInsDsc(id);
+            ins  = id->idIns();
+            sz   = emitSizeOfInsDsc(id);
             break;
     }
 
@@ -3573,7 +3604,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             emitGCregDeadUpd(id->idReg1(), dst2);
         }
 
-        //if (emitInsMayWriteMultipleRegs(id))
+        // if (emitInsMayWriteMultipleRegs(id))
         //{
         //    // INS_gslq etc...
         //    // "idReg2" is the secondary destination register
@@ -3617,7 +3648,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             if (vt == TYP_REF || vt == TYP_BYREF)
                 emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum));
         }
-        //if (emitInsWritesToLclVarStackLocPair(id))
+        // if (emitInsWritesToLclVarStackLocPair(id))
         //{
         //    unsigned ofs2 = ofs + TARGET_POINTER_SIZE;
         //    if (id->idGCrefReg2() != GCT_NONE)
@@ -3647,18 +3678,18 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 #ifdef DEBUG
     /* Make sure we set the instruction descriptor size correctly */
 
-    //size_t expected = emitSizeOfInsDsc(id);
-    //assert(sz == expected);
+    // size_t expected = emitSizeOfInsDsc(id);
+    // assert(sz == expected);
 
     if (emitComp->opts.disAsm || emitComp->verbose)
     {
-        code_t *cp = (code_t*) *dp;
+        code_t* cp = (code_t*)*dp;
         while ((BYTE*)cp != dst)
         {
             emitDisInsName(*cp, (BYTE*)cp, id);
             cp++;
         }
-        //emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+        // emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
     }
 
     if (emitComp->compDebugBreak)
@@ -3691,7 +3722,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
  *  Display the given instruction.
  */
 
-//NOTE: At least 32bytes within dst.
+// NOTE: At least 32bytes within dst.
 void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
 {
     const BYTE* insstrs = dst;
@@ -3700,29 +3731,28 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
     {
         printf("LOONGARCH invalid instruction: 0x%x\n", code);
         assert(!"invalid inscode on LOONGARCH!");
-        return ;
+        return;
     }
 
-// clang-format off
+    // clang-format off
     const char * const regName[] = {"zero", "ra", "tp", "sp", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "x0", "fp", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"};
 
     const char * const FregName[] = {"fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11", "ft12", "ft13", "ft14", "ft15", "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"};
 
     const char * const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"};
-// clang-format on
-
+    // clang-format on
 
-    unsigned int opcode = (code>>26) & 0x3f;
+    unsigned int opcode = (code >> 26) & 0x3f;
 
-    //bits: 31-26,MSB6
+    // bits: 31-26,MSB6
     switch (opcode)
     {
         case 0x0:
         {
-           goto Label_OPCODE_0;
-           //break;
+            goto Label_OPCODE_0;
+            // break;
         }
-        //case 0x1:
+        // case 0x1:
         //{
         //    assert(!"unimplemented on loongarch yet!");
         //    //goto Label_OPCODE_1;
@@ -3731,23 +3761,23 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         case 0x2:
         {
             goto Label_OPCODE_2;
-            //break;
+            // break;
         }
         case 0x3:
         {
             goto Label_OPCODE_3;
-            //break;
+            // break;
         }
         case 0xe:
         {
             goto Label_OPCODE_E;
-            //break;
+            // break;
         }
-        case LA_2RI16_ADDU16I_D: //0x4
+        case LA_2RI16_ADDU16I_D: // 0x4
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            short si16 = (code >> 10) & 0xffff;
+            const char* rd   = regName[code & 0x1f];
+            const char* rj   = regName[(code >> 5) & 0x1f];
+            short       si16 = (code >> 10) & 0xffff;
             printf("   0x%llx   addu16i.d  %s, %s, %d\n", insstrs, rd, rj, si16);
             return;
         }
@@ -3755,10 +3785,10 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         case 0x6:
         case 0x7:
         {
-            //bits: 31-25,MSB7
+            // bits: 31-25,MSB7
             unsigned int inscode = (code >> 25) & 0x7f;
-            const char *rd = regName[code & 0x1f];
-            unsigned int si20 = (code >> 5) & 0xfffff;
+            const char*  rd      = regName[code & 0x1f];
+            unsigned int si20    = (code >> 5) & 0xfffff;
             switch (inscode)
             {
                 case LA_1RI20_LU12I_W:
@@ -3781,7 +3811,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     printf("   0x%llx   pcaddu18i  %s, 0x%x\n", insstrs, rd, si20);
                     return;
                 }
-                default :
+                default:
                     printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                     return;
             }
@@ -3790,11 +3820,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         case 0x8:
         case 0x9:
         {
-            //bits: 31-24,MSB8
+            // bits: 31-24,MSB8
             unsigned int inscode = (code >> 24) & 0xff;
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            short si14 = ((code >> 10) & 0x3fff)<<2;
+            const char*  rd      = regName[code & 0x1f];
+            const char*  rj      = regName[(code >> 5) & 0x1f];
+            short        si14    = ((code >> 10) & 0x3fff) << 2;
             si14 >>= 2;
             switch (inscode)
             {
@@ -3822,7 +3852,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 case LA_2RI14_STPTR_D:
                     printf("   0x%llx   stptr.d  %s, %s, %d\n", insstrs, rd, rj, si14);
                     return;
-                default :
+                default:
                     printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                     return;
             }
@@ -3830,12 +3860,12 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case 0xa:
         {
-            //bits: 31-24,MSB8
+            // bits: 31-24,MSB8
             unsigned int inscode = (code >> 22) & 0x3ff;
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *fd = FregName[code & 0x1f];
-            short si12 = ((code >> 10) & 0xfff)<<4;
+            const char*  rd      = regName[code & 0x1f];
+            const char*  rj      = regName[(code >> 5) & 0x1f];
+            const char*  fd      = FregName[code & 0x1f];
+            short        si12    = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             switch (inscode)
             {
@@ -3887,56 +3917,59 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 case LA_2RI12_FST_D:
                     printf("   0x%llx   fst.d  %s, %s, %d\n", insstrs, fd, rj, si12);
                     return;
-                default :
+                default:
                     printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                     return;
             }
             return;
         }
-        case LA_1RI21_BEQZ: //0x10
+        case LA_1RI21_BEQZ: // 0x10
         {
-            const char *rj = regName[(code>>5) & 0x1f];
-            int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16))<<11;
+            const char* rj     = regName[(code >> 5) & 0x1f];
+            int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
             offs21 >>= 9;
             printf("   0x%llx   beqz  %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21);
             return;
         }
-        case LA_1RI21_BNEZ: //0x11
+        case LA_1RI21_BNEZ: // 0x11
         {
-            const char *rj = regName[(code>>5) & 0x1f];
-            int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16))<<11;
+            const char* rj     = regName[(code >> 5) & 0x1f];
+            int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
             offs21 >>= 9;
             printf("   0x%llx   bnez  %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21);
             return;
         }
         case 0x12:
         {
-            //LA_1RI21_BCEQZ
-            //LA_1RI21_BCNEZ
-            const char *cj = CFregName[(code>>5) & 0x7];
-            int offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
+            // LA_1RI21_BCEQZ
+            // LA_1RI21_BCNEZ
+            const char* cj     = CFregName[(code >> 5) & 0x7];
+            int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
             offs21 >>= 9;
-            if (0 == ((code>>8) & 0x3)) {
+            if (0 == ((code >> 8) & 0x3))
+            {
                 printf("   0x%llx   bceqz  %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21);
                 return;
             }
-            else if (1 == ((code>>8) & 0x3)) {
+            else if (1 == ((code >> 8) & 0x3))
+            {
                 printf("   0x%llx   bcnez  %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21);
                 return;
             }
-            else {
+            else
+            {
                 printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                 return;
             }
             return;
         }
-        case LA_2RI16_JIRL: //0x13
+        case LA_2RI16_JIRL: // 0x13
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            int offs16 = (short)((code >> 10) & 0xffff);
+            const char* rd     = regName[code & 0x1f];
+            const char* rj     = regName[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
-            if(id->idDebugOnlyInfo()->idMemCookie)
+            if (id->idDebugOnlyInfo()->idMemCookie)
             {
                 assert(0 < id->idDebugOnlyInfo()->idMemCookie);
                 const char* methodName;
@@ -3949,76 +3982,76 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             }
             return;
         }
-        case LA_I26_B: //0x14
+        case LA_I26_B: // 0x14
         {
-            int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16))<<6;
+            int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6;
             offs26 >>= 4;
             printf("   0x%llx   b  0x%llx\n", insstrs, (int64_t)insstrs + offs26);
             return;
         }
-        case LA_I26_BL: //0x15
+        case LA_I26_BL: // 0x15
         {
-            int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16))<<6;
+            int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6;
             offs26 >>= 4;
             printf("   0x%llx   bl  0x%llx\n", insstrs, (int64_t)insstrs + offs26);
             return;
         }
-        case LA_2RI16_BEQ: //0x16
+        case LA_2RI16_BEQ: // 0x16
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            int offs16 = (short)((code >> 10) & 0xffff);
+            const char* rd     = regName[code & 0x1f];
+            const char* rj     = regName[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   beq  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
             return;
         }
-        case LA_2RI16_BNE: //0x17
+        case LA_2RI16_BNE: // 0x17
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            int offs16 = (short)((code >> 10) & 0xffff);
+            const char* rd     = regName[code & 0x1f];
+            const char* rj     = regName[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   bne  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
             return;
         }
-        case LA_2RI16_BLT: //0x18
+        case LA_2RI16_BLT: // 0x18
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            int offs16 = (short)((code >> 10) & 0xffff);
+            const char* rd     = regName[code & 0x1f];
+            const char* rj     = regName[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   blt  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
             return;
         }
-        case LA_2RI16_BGE: //0x19
+        case LA_2RI16_BGE: // 0x19
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            int offs16 = (short)((code >> 10) & 0xffff);
+            const char* rd     = regName[code & 0x1f];
+            const char* rj     = regName[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   bge  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
             return;
         }
-        case LA_2RI16_BLTU: //0x1a
+        case LA_2RI16_BLTU: // 0x1a
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            int offs16 = (short)((code >> 10) & 0xffff);
+            const char* rd     = regName[code & 0x1f];
+            const char* rj     = regName[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   bltu  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
             return;
         }
-        case LA_2RI16_BGEU: //0x1b
+        case LA_2RI16_BGEU: // 0x1b
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            int offs16 = (short)((code >> 10) & 0xffff);
+            const char* rd     = regName[code & 0x1f];
+            const char* rj     = regName[(code >> 5) & 0x1f];
+            int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   bgeu  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
             return;
         }
 
-        default :
+        default:
             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
             return;
     }
@@ -4026,27 +4059,27 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
 Label_OPCODE_0:
     opcode = (code >> 22) & 0x3ff;
 
-    //bits: 31-22,MSB10
+    // bits: 31-22,MSB10
     switch (opcode)
     {
         case 0x0:
         {
-            //bits: 31-18,MSB14
+            // bits: 31-18,MSB14
             unsigned int inscode1 = (code >> 18) & 0x3fff;
             switch (inscode1)
             {
                 case 0x0:
                 {
-                    //bits: 31-15,MSB17
+                    // bits: 31-15,MSB17
                     unsigned int inscode2 = (code >> 15) & 0x1ffff;
                     switch (inscode2)
                     {
                         case 0x0:
                         {
-                            //bits:31-10,MSB22
+                            // bits:31-10,MSB22
                             unsigned int inscode3 = (code >> 10) & 0x3fffff;
-                            const char *rd = regName[code & 0x1f];
-                            const char *rj = regName[(code>>5) & 0x1f];
+                            const char*  rd       = regName[code & 0x1f];
+                            const char*  rj       = regName[(code >> 5) & 0x1f];
                             switch (inscode3)
                             {
                                 case LA_2R_CLO_W:
@@ -4122,7 +4155,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                                     printf("   0x%llx   cpucfg  %s, %s\n", insstrs, rd, rj);
                                     return;
 
-                                default :
+                                default:
                                     printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                                     return;
                             }
@@ -4130,19 +4163,19 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         }
                         case LA_2R_ASRTLE_D:
                         {
-                            const char *rj = regName[(code>>5) & 0x1f];
-                            const char *rk = regName[(code>>10) & 0x1f];
+                            const char* rj = regName[(code >> 5) & 0x1f];
+                            const char* rk = regName[(code >> 10) & 0x1f];
                             printf("   0x%llx   asrtle.d  %s, %s\n", insstrs, rj, rk);
                             return;
                         }
                         case LA_2R_ASRTGT_D:
                         {
-                            const char *rj = regName[(code>>5) & 0x1f];
-                            const char *rk = regName[(code>>10) & 0x1f];
+                            const char* rj = regName[(code >> 5) & 0x1f];
+                            const char* rk = regName[(code >> 10) & 0x1f];
                             printf("   0x%llx   asrtgt.d  %s, %s\n", insstrs, rj, rk);
                             return;
                         }
-                        default :
+                        default:
                             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                             return;
                     }
@@ -4150,39 +4183,44 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 }
                 case 0x1:
                 {
-                    //LA_OP_ALSL_W
-                    //LA_OP_ALSL_WU
-                    const char *rd = regName[code & 0x1f];
-                    const char *rj = regName[(code>>5) & 0x1f];
-                    const char *rk = regName[(code>>10) & 0x1f];
-                    unsigned int sa2 = (code>>15) & 0x3;
-                    if (0 == ((code>>17) & 0x1)) {
-                        printf("   0x%llx   alsl.w  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1));
+                    // LA_OP_ALSL_W
+                    // LA_OP_ALSL_WU
+                    const char*  rd  = regName[code & 0x1f];
+                    const char*  rj  = regName[(code >> 5) & 0x1f];
+                    const char*  rk  = regName[(code >> 10) & 0x1f];
+                    unsigned int sa2 = (code >> 15) & 0x3;
+                    if (0 == ((code >> 17) & 0x1))
+                    {
+                        printf("   0x%llx   alsl.w  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1));
                         return;
-                    } else if (1 == ((code>>17) & 0x1)) {
-                        printf("   0x%llx   alsl.wu  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1));
+                    }
+                    else if (1 == ((code >> 17) & 0x1))
+                    {
+                        printf("   0x%llx   alsl.wu  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1));
                         return;
-                    } else {
+                    }
+                    else
+                    {
                         printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                         return;
                     }
                     return;
                 }
-                case LA_OP_BYTEPICK_W: //0x2
+                case LA_OP_BYTEPICK_W: // 0x2
                 {
-                    const char *rd = regName[code & 0x1f];
-                    const char *rj = regName[(code>>5) & 0x1f];
-                    const char *rk = regName[(code>>10) & 0x1f];
-                    unsigned int sa2 = (code>>15) & 0x3;
+                    const char*  rd  = regName[code & 0x1f];
+                    const char*  rj  = regName[(code >> 5) & 0x1f];
+                    const char*  rk  = regName[(code >> 10) & 0x1f];
+                    unsigned int sa2 = (code >> 15) & 0x3;
                     printf("   0x%llx   bytepick.w  %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa2);
                     return;
                 }
-                case LA_OP_BYTEPICK_D: //0x3
+                case LA_OP_BYTEPICK_D: // 0x3
                 {
-                    const char *rd = regName[code & 0x1f];
-                    const char *rj = regName[(code>>5) & 0x1f];
-                    const char *rk = regName[(code>>10) & 0x1f];
-                    unsigned int sa3 = (code>>15) & 0x7;
+                    const char*  rd  = regName[code & 0x1f];
+                    const char*  rj  = regName[(code >> 5) & 0x1f];
+                    const char*  rk  = regName[(code >> 10) & 0x1f];
+                    unsigned int sa3 = (code >> 15) & 0x7;
                     printf("   0x%llx   bytepick.d  %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa3);
                     return;
                 }
@@ -4193,11 +4231,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 case 0x8:
                 case 0x9:
                 {
-                    //bits: 31-15,MSB17
+                    // bits: 31-15,MSB17
                     unsigned int inscode2 = (code >> 15) & 0x1ffff;
-                    const char *rd = regName[code & 0x1f];
-                    const char *rj = regName[(code>>5) & 0x1f];
-                    const char *rk = regName[(code>>10) & 0x1f];
+                    const char*  rd       = regName[code & 0x1f];
+                    const char*  rj       = regName[(code >> 5) & 0x1f];
+                    const char*  rk       = regName[(code >> 10) & 0x1f];
 
                     switch (inscode2)
                     {
@@ -4339,15 +4377,15 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         case LA_3R_CRCC_W_D_W:
                             printf("   0x%llx   crcc.w.d.w  %s, %s, %s\n", insstrs, rd, rj, rk);
                             return;
-                        default :
+                        default:
                             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                             return;
                     }
                 }
                 case 0xa:
                 {
-                    //bits: 31-15,MSB17
-                    unsigned int inscode2 = (code >> 15) & 0x1ffff;
+                    // bits: 31-15,MSB17
+                    unsigned int inscode2  = (code >> 15) & 0x1ffff;
                     unsigned int codefield = code & 0x7fff;
                     switch (inscode2)
                     {
@@ -4360,21 +4398,21 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         case LA_OP_SYSCALL:
                             printf("   0x%llx   syscall  0x%x\n", insstrs, codefield);
                             return;
-                        default :
+                        default:
                             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                             return;
                     }
                 }
-                case LA_OP_ALSL_D: //0xb
+                case LA_OP_ALSL_D: // 0xb
                 {
-                    const char *rd = regName[code & 0x1f];
-                    const char *rj = regName[(code>>5) & 0x1f];
-                    const char *rk = regName[(code>>10) & 0x1f];
-                    unsigned int sa2 = (code>>15) & 0x3;
-                    printf("   0x%llx   alsl.d  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2+1));
+                    const char*  rd  = regName[code & 0x1f];
+                    const char*  rj  = regName[(code >> 5) & 0x1f];
+                    const char*  rk  = regName[(code >> 10) & 0x1f];
+                    unsigned int sa2 = (code >> 15) & 0x3;
+                    printf("   0x%llx   alsl.d  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1));
                     return;
                 }
-                default :
+                default:
                     printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                     return;
             }
@@ -4382,44 +4420,56 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case 0x1:
         {
-            if (code & 0x200000) {
-                //LA_OP_BSTRINS_W
-                //LA_OP_BSTRPICK_W
-                const char *rd = regName[code & 0x1f];
-                const char *rj = regName[(code>>5) & 0x1f];
+            if (code & 0x200000)
+            {
+                // LA_OP_BSTRINS_W
+                // LA_OP_BSTRPICK_W
+                const char*  rd   = regName[code & 0x1f];
+                const char*  rj   = regName[(code >> 5) & 0x1f];
                 unsigned int lsbw = (code >> 10) & 0x1f;
                 unsigned int msbw = (code >> 16) & 0x1f;
-                if (!(code & 0x8000)) {
+                if (!(code & 0x8000))
+                {
                     printf("   0x%llx   bstrins.w  %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw);
                     return;
-                } else if (code & 0x8000) {
+                }
+                else if (code & 0x8000)
+                {
                     printf("   0x%llx   bstrpick.w  %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw);
                     return;
-                } else {
+                }
+                else
+                {
                     printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                     return;
                 }
             }
-            else {
-                //bits: 31-18,MSB14
+            else
+            {
+                // bits: 31-18,MSB14
                 unsigned int inscode1 = (code >> 18) & 0x3fff;
                 switch (inscode1)
                 {
                     case 0x10:
                     {
-                        //LA_OP_SLLI_W:
-                        //LA_OP_SLLI_D:
-                        const char *rd = regName[code & 0x1f];
-                        const char *rj = regName[(code>>5) & 0x1f];
-                        if (1 == ((code>>15) & 0x7)) {
-                            unsigned int ui5 = (code>>10) & 0x1f;
+                        // LA_OP_SLLI_W:
+                        // LA_OP_SLLI_D:
+                        const char* rd = regName[code & 0x1f];
+                        const char* rj = regName[(code >> 5) & 0x1f];
+                        if (1 == ((code >> 15) & 0x7))
+                        {
+                            unsigned int ui5 = (code >> 10) & 0x1f;
                             printf("   0x%llx   slli.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
                             return;
-                        } else if (1 == ((code>>16) & 0x3)) {
-                            unsigned int ui6 = (code>>10) & 0x3f;
+                        }
+                        else if (1 == ((code >> 16) & 0x3))
+                        {
+                            unsigned int ui6 = (code >> 10) & 0x3f;
                             printf("   0x%llx   slli.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
                             return;
-                        } else {
+                        }
+                        else
+                        {
                             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                             return;
                         }
@@ -4427,19 +4477,24 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     }
                     case 0x11:
                     {
-                        //LA_OP_SRLI_W:
-                        //LA_OP_SRLI_D:
-                        const char *rd = regName[code & 0x1f];
-                        const char *rj = regName[(code>>5) & 0x1f];
-                        if (1 == ((code>>15) & 0x7)) {
-                            unsigned int ui5 = (code>>10) & 0x1f;
+                        // LA_OP_SRLI_W:
+                        // LA_OP_SRLI_D:
+                        const char* rd = regName[code & 0x1f];
+                        const char* rj = regName[(code >> 5) & 0x1f];
+                        if (1 == ((code >> 15) & 0x7))
+                        {
+                            unsigned int ui5 = (code >> 10) & 0x1f;
                             printf("   0x%llx   srli.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
                             return;
-                        } else if (1 == ((code>>16) & 0x3)) {
-                            unsigned int ui6 = (code>>10) & 0x3f;
+                        }
+                        else if (1 == ((code >> 16) & 0x3))
+                        {
+                            unsigned int ui6 = (code >> 10) & 0x3f;
                             printf("   0x%llx   srli.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
                             return;
-                        } else {
+                        }
+                        else
+                        {
                             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                             return;
                         }
@@ -4447,19 +4502,24 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     }
                     case 0x12:
                     {
-                        //LA_OP_SRAI_W:
-                        //LA_OP_SRAI_D:
-                        const char *rd = regName[code & 0x1f];
-                        const char *rj = regName[(code>>5) & 0x1f];
-                        if (1 == ((code>>15) & 0x7)) {
-                            unsigned int ui5 = (code>>10) & 0x1f;
+                        // LA_OP_SRAI_W:
+                        // LA_OP_SRAI_D:
+                        const char* rd = regName[code & 0x1f];
+                        const char* rj = regName[(code >> 5) & 0x1f];
+                        if (1 == ((code >> 15) & 0x7))
+                        {
+                            unsigned int ui5 = (code >> 10) & 0x1f;
                             printf("   0x%llx   srai.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
                             return;
-                        } else if (1 == ((code>>16) & 0x3)) {
-                            unsigned int ui6 = (code>>10) & 0x3f;
+                        }
+                        else if (1 == ((code >> 16) & 0x3))
+                        {
+                            unsigned int ui6 = (code >> 10) & 0x3f;
                             printf("   0x%llx   srai.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
                             return;
-                        } else {
+                        }
+                        else
+                        {
                             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                             return;
                         }
@@ -4467,36 +4527,41 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     }
                     case 0x13:
                     {
-                        //LA_OP_ROTRI_W:
-                        //LA_OP_ROTRI_D:
-                        const char *rd = regName[code & 0x1f];
-                        const char *rj = regName[(code>>5) & 0x1f];
-                        if (1 == ((code>>15) & 0x7)) {
-                            unsigned int ui5 = (code>>10) & 0x1f;
+                        // LA_OP_ROTRI_W:
+                        // LA_OP_ROTRI_D:
+                        const char* rd = regName[code & 0x1f];
+                        const char* rj = regName[(code >> 5) & 0x1f];
+                        if (1 == ((code >> 15) & 0x7))
+                        {
+                            unsigned int ui5 = (code >> 10) & 0x1f;
                             printf("   0x%llx   rotri.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
                             return;
-                        } else if (1 == ((code>>16) & 0x3)) {
-                            unsigned int ui6 = (code>>10) & 0x3f;
+                        }
+                        else if (1 == ((code >> 16) & 0x3))
+                        {
+                            unsigned int ui6 = (code >> 10) & 0x3f;
                             printf("   0x%llx   rotri.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
                             return;
-                        } else {
+                        }
+                        else
+                        {
                             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                             return;
                         }
                         return;
                     }
-                    default :
+                    default:
                         printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                         return;
                 }
                 return;
-                }
+            }
             return;
         }
         case LA_OP_BSTRINS_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
+            const char*  rd   = regName[code & 0x1f];
+            const char*  rj   = regName[(code >> 5) & 0x1f];
             unsigned int lsbd = (code >> 10) & 0x3f;
             unsigned int msbd = (code >> 16) & 0x3f;
             printf("   0x%llx   bstrins.d  %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd);
@@ -4504,8 +4569,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_OP_BSTRPICK_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
+            const char*  rd   = regName[code & 0x1f];
+            const char*  rj   = regName[(code >> 5) & 0x1f];
             unsigned int lsbd = (code >> 10) & 0x3f;
             unsigned int msbd = (code >> 16) & 0x3f;
             printf("   0x%llx   bstrpick.d  %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd);
@@ -4513,13 +4578,13 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case 0x4:
         {
-            //bits: 31-15,MSB17
+            // bits: 31-15,MSB17
             unsigned int inscode1 = (code >> 15) & 0x1ffff;
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
+            const char*  fd       = FregName[code & 0x1f];
+            const char*  fj       = FregName[(code >> 5) & 0x1f];
+            const char*  fk       = FregName[(code >> 10) & 0x1f];
+            const char*  rd       = regName[code & 0x1f];
+            const char*  rj       = regName[(code >> 5) & 0x1f];
 
             switch (inscode1)
             {
@@ -4592,7 +4657,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 case 0x23a:
                 case 0x23c:
                 {
-                    //bits:31-10,MSB22
+                    // bits:31-10,MSB22
                     unsigned int inscode2 = (code >> 10) & 0x3fffff;
                     switch (inscode2)
                     {
@@ -4670,25 +4735,25 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                             return;
                         case LA_2R_MOVFR2CF:
                         {
-                            const char *cd = CFregName[code & 0x7];
+                            const char* cd = CFregName[code & 0x7];
                             printf("   0x%llx   movfr2cf  %s, %s\n", insstrs, cd, fj);
                             return;
                         }
                         case LA_2R_MOVCF2FR:
                         {
-                            const char *cj = CFregName[(code>>5) & 0x7];
+                            const char* cj = CFregName[(code >> 5) & 0x7];
                             printf("   0x%llx   movcf2fr  %s, %s\n", insstrs, fd, cj);
                             return;
                         }
                         case LA_2R_MOVGR2CF:
                         {
-                            const char *cd = CFregName[code & 0x7];
+                            const char* cd = CFregName[code & 0x7];
                             printf("   0x%llx   movgr2cf  %s, %s\n", insstrs, cd, rj);
                             return;
                         }
                         case LA_2R_MOVCF2GR:
                         {
-                            const char *cj = CFregName[(code>>5) & 0x7];
+                            const char* cj = CFregName[(code >> 5) & 0x7];
                             printf("   0x%llx   movcf2gr  %s, %s\n", insstrs, rd, cj);
                             return;
                         }
@@ -4776,177 +4841,176 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         case LA_2R_FRINT_D:
                             printf("   0x%llx   frint.d  %s, %s\n", insstrs, fd, fj);
                             return;
-                        default :
+                        default:
                             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                             return;
                     }
                     return;
                 }
 
-                default :
+                default:
                     printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                     return;
             }
             return;
         }
-        case LA_2RI12_SLTI: //0x8
+        case LA_2RI12_SLTI: // 0x8
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            short si12 = ((code >> 10) & 0xfff)<<4;
+            const char* rd   = regName[code & 0x1f];
+            const char* rj   = regName[(code >> 5) & 0x1f];
+            short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             printf("   0x%llx   slti  %s, %s, %d\n", insstrs, rd, rj, si12);
             return;
         }
-        case LA_2RI12_SLTUI: //0x9
+        case LA_2RI12_SLTUI: // 0x9
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            short si12 = ((code >> 10) & 0xfff)<<4;
+            const char* rd   = regName[code & 0x1f];
+            const char* rj   = regName[(code >> 5) & 0x1f];
+            short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             printf("   0x%llx   sltui  %s, %s, %d\n", insstrs, rd, rj, si12);
             return;
         }
-        case LA_2RI12_ADDI_W: //0xa
+        case LA_2RI12_ADDI_W: // 0xa
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            short si12 = ((code >> 10) & 0xfff)<<4;
+            const char* rd   = regName[code & 0x1f];
+            const char* rj   = regName[(code >> 5) & 0x1f];
+            short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             printf("   0x%llx   addi.w  %s, %s, %d\n", insstrs, rd, rj, si12);
             return;
         }
-        case LA_2RI12_ADDI_D: //0xb
+        case LA_2RI12_ADDI_D: // 0xb
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            short si12 = ((code >> 10) & 0xfff)<<4;
+            const char* rd   = regName[code & 0x1f];
+            const char* rj   = regName[(code >> 5) & 0x1f];
+            short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             printf("   0x%llx   addi.d  %s, %s, %ld\n", insstrs, rd, rj, si12);
             return;
         }
-        case LA_2RI12_LU52I_D: //0xc
+        case LA_2RI12_LU52I_D: // 0xc
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
+            const char*  rd   = regName[code & 0x1f];
+            const char*  rj   = regName[(code >> 5) & 0x1f];
             unsigned int si12 = (code >> 10) & 0xfff;
             printf("   0x%llx   lu52i.d  %s, %s, 0x%x\n", insstrs, rd, rj, si12);
             return;
         }
-        case LA_2RI12_ANDI: //0xd
+        case LA_2RI12_ANDI: // 0xd
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
+            const char*  rd   = regName[code & 0x1f];
+            const char*  rj   = regName[(code >> 5) & 0x1f];
             unsigned int ui12 = ((code >> 10) & 0xfff);
             printf("   0x%llx   andi  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
             return;
         }
-        case LA_2RI12_ORI: //0xe
+        case LA_2RI12_ORI: // 0xe
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
+            const char*  rd   = regName[code & 0x1f];
+            const char*  rj   = regName[(code >> 5) & 0x1f];
             unsigned int ui12 = ((code >> 10) & 0xfff);
             printf("   0x%llx   ori  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
             return;
         }
-        case LA_2RI12_XORI: //0xf
+        case LA_2RI12_XORI: // 0xf
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
+            const char*  rd   = regName[code & 0x1f];
+            const char*  rj   = regName[(code >> 5) & 0x1f];
             unsigned int ui12 = ((code >> 10) & 0xfff);
             printf("   0x%llx   xori  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
             return;
         }
 
-        default :
+        default:
             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
             return;
     }
 
-//Label_OPCODE_1:
+// Label_OPCODE_1:
 //    opcode = (code >> 24) & 0xff;
 //    //bits: 31-24,MSB8
 
-
 Label_OPCODE_2:
     opcode = (code >> 20) & 0xfff;
 
-    //bits: 31-20,MSB12
+    // bits: 31-20,MSB12
     switch (opcode)
     {
         case LA_4R_FMADD_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *fa = FregName[(code>>15) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* fj = FregName[(code >> 5) & 0x1f];
+            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* fa = FregName[(code >> 15) & 0x1f];
             printf("   0x%llx   fmadd.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FMADD_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *fa = FregName[(code>>15) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* fj = FregName[(code >> 5) & 0x1f];
+            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* fa = FregName[(code >> 15) & 0x1f];
             printf("   0x%llx   fmadd.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FMSUB_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *fa = FregName[(code>>15) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* fj = FregName[(code >> 5) & 0x1f];
+            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* fa = FregName[(code >> 15) & 0x1f];
             printf("   0x%llx   fmsub.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FMSUB_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *fa = FregName[(code>>15) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* fj = FregName[(code >> 5) & 0x1f];
+            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* fa = FregName[(code >> 15) & 0x1f];
             printf("   0x%llx   fmsub.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMADD_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *fa = FregName[(code>>15) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* fj = FregName[(code >> 5) & 0x1f];
+            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* fa = FregName[(code >> 15) & 0x1f];
             printf("   0x%llx   fnmadd.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMADD_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *fa = FregName[(code>>15) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* fj = FregName[(code >> 5) & 0x1f];
+            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* fa = FregName[(code >> 15) & 0x1f];
             printf("   0x%llx   fnmadd.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMSUB_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *fa = FregName[(code>>15) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* fj = FregName[(code >> 5) & 0x1f];
+            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* fa = FregName[(code >> 15) & 0x1f];
             printf("   0x%llx   fnmsub.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMSUB_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *fa = FregName[(code>>15) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* fj = FregName[(code >> 5) & 0x1f];
+            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* fa = FregName[(code >> 15) & 0x1f];
             printf("   0x%llx   fnmsub.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
-        default :
+        default:
             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
             return;
     }
@@ -4954,16 +5018,16 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
 Label_OPCODE_3:
     opcode = (code >> 20) & 0xfff;
 
-    //bits: 31-20,MSB12
+    // bits: 31-20,MSB12
     switch (opcode)
     {
         case LA_OP_FCMP_cond_S:
         {
-            //bits:19-15,cond
+            // bits:19-15,cond
             unsigned int cond = (code >> 15) & 0x1f;
-            const char *cd = CFregName[code & 0x7];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
+            const char*  cd   = CFregName[code & 0x7];
+            const char*  fj   = FregName[(code >> 5) & 0x1f];
+            const char*  fk   = FregName[(code >> 10) & 0x1f];
             switch (cond)
             {
                 case 0x0:
@@ -5032,18 +5096,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 case 0x19:
                     printf("   0x%llx   fcmp.sune.s  %s, %s, %s\n", insstrs, cd, fj, fk);
                     return;
-                default :
+                default:
                     printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                     return;
             }
         }
         case LA_OP_FCMP_cond_D:
         {
-            //bits:19-15,cond
+            // bits:19-15,cond
             unsigned int cond = (code >> 15) & 0x1f;
-            const char *cd = CFregName[code & 0x7];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
+            const char*  cd   = CFregName[code & 0x7];
+            const char*  fj   = FregName[(code >> 5) & 0x1f];
+            const char*  fk   = FregName[(code >> 10) & 0x1f];
             switch (cond)
             {
                 case 0x0:
@@ -5112,21 +5176,21 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 case 0x19:
                     printf("   0x%llx   fcmp.sune.d  %s, %s, %s\n", insstrs, cd, fj, fk);
                     return;
-                default :
+                default:
                     printf("LOONGARCH illegal instruction: 0x%08x\n", code);
                     return;
             }
         }
         case LA_4R_FSEL:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *fj = FregName[(code>>5) & 0x1f];
-            const char *fk = FregName[(code>>10) & 0x1f];
-            const char *ca = CFregName[(code>>15) & 0x7];
+            const char* fd = FregName[code & 0x1f];
+            const char* fj = FregName[(code >> 5) & 0x1f];
+            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* ca = CFregName[(code >> 15) & 0x7];
             printf("   0x%llx   fsel  %s, %s, %s, %s\n", insstrs, fd, fj, fk, ca);
             return;
         }
-        default :
+        default:
             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
             return;
     }
@@ -5134,94 +5198,94 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
 Label_OPCODE_E:
     opcode = (code >> 15) & 0x1ffff;
 
-    //bits: 31-15,MSB17
+    // bits: 31-15,MSB17
     switch (opcode)
     {
         case LA_3R_LDX_B:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_H:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STX_B:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stx.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STX_H:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stx.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STX_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stx.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STX_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stx.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_BU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.bu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_HU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.hu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_WU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
@@ -5230,321 +5294,321 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             return;
         case LA_3R_FLDX_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fldx.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FLDX_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fldx.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTX_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fstx.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTX_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fstx.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amswap.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amswap.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amadd.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amadd.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amand.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amand.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amor.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amor.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amxor.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amxor.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_WU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax.du  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_WU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin.du  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_DB_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amswap_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_DB_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amswap_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_DB_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amadd_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_DB_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amadd_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_DB_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amand_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_DB_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amand_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_DB_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amor_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_DB_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amor_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_DB_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amxor_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_DB_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   amxor_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_WU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax_db.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_DU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax_db.du  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_WU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin_db.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_DU:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin_db.du  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
@@ -5562,197 +5626,197 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_3R_FLDGT_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fldgt.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FLDGT_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fldgt.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FLDLE_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fldle.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FLDLE_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fldle.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTGT_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fstgt.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTGT_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fstgt.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTLE_S:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fstle.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTLE_D:
         {
-            const char *fd = FregName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* fd = FregName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   fstle.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_LDGT_B:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldgt.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDGT_H:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldgt.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDGT_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldgt.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDGT_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldgt.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_B:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldle.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_H:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldle.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldle.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   ldle.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STGT_B:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stgt.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STGT_H:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stgt.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STGT_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stgt.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STGT_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stgt.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STLE_B:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stle.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STLE_H:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stle.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STLE_W:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stle.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STLE_D:
         {
-            const char *rd = regName[code & 0x1f];
-            const char *rj = regName[(code>>5) & 0x1f];
-            const char *rk = regName[(code>>10) & 0x1f];
+            const char* rd = regName[code & 0x1f];
+            const char* rj = regName[(code >> 5) & 0x1f];
+            const char* rk = regName[(code >> 10) & 0x1f];
             printf("   0x%llx   stle.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
-        default :
+        default:
             printf("LOONGARCH illegal instruction: 0x%08x\n", code);
             return;
     }
@@ -5782,7 +5846,7 @@ void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz)
 
 void emitter::emitDispIns(
     instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
-{//not used on loongarch64.
+{ // not used on loongarch64.
     printf("------------not implements emitDispIns() for loongarch64!!!\n");
 }
 
@@ -5845,8 +5909,8 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
     {
         assert(addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR, GT_LEA));
 
-        int offset = 0;
-        DWORD lsl = 0;
+        int   offset = 0;
+        DWORD lsl    = 0;
 
         if (addr->OperGet() == GT_LEA)
         {
@@ -5891,7 +5955,8 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
                 else // large offset
                 {
                     // First load/store tmpReg with the large offset constant
-                    emitIns_I_la(EA_PTRSIZE, tmpReg, offset);//codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+                    emitIns_I_la(EA_PTRSIZE, tmpReg,
+                                 offset); // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
                     // Then add the base register
                     //      rd = rd + base
                     emitIns_R_R_R(INS_add_d, addType, tmpReg, tmpReg, memBase->GetRegNum());
@@ -5910,63 +5975,65 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
                 // Then load/store dataReg from/to [memBase + index]
                 switch (EA_SIZE(emitTypeSize(indir->TypeGet())))
                 {
-                case EA_1BYTE:
-                assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)));
-                    if (ins <= INS_ld_wu)
-                    {
-                        if (varTypeIsUnsigned(indir->TypeGet()))
-                            ins = INS_ldx_bu;
+                    case EA_1BYTE:
+                        assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)));
+                        if (ins <= INS_ld_wu)
+                        {
+                            if (varTypeIsUnsigned(indir->TypeGet()))
+                                ins = INS_ldx_bu;
+                            else
+                                ins = INS_ldx_b;
+                        }
                         else
-                            ins = INS_ldx_b;
-                    }
-                    else
-                        ins = INS_stx_b;
-                    break;
-                case EA_2BYTE:
-                assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)));
-                    if (ins <= INS_ld_wu)
-                    {
-                        if (varTypeIsUnsigned(indir->TypeGet()))
-                            ins = INS_ldx_hu;
+                            ins = INS_stx_b;
+                        break;
+                    case EA_2BYTE:
+                        assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)));
+                        if (ins <= INS_ld_wu)
+                        {
+                            if (varTypeIsUnsigned(indir->TypeGet()))
+                                ins = INS_ldx_hu;
+                            else
+                                ins = INS_ldx_h;
+                        }
                         else
-                            ins = INS_ldx_h;
-                    }
-                    else
-                        ins = INS_stx_h;
-                    break;
-                case EA_4BYTE:
-                assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || (ins == INS_fst_s) || (ins == INS_fld_s));
-                    assert(INS_fst_s > INS_st_d);
-                    if (ins <= INS_ld_wu)
-                    {
-                        if (varTypeIsUnsigned(indir->TypeGet()))
-                            ins = INS_ldx_wu;
+                            ins = INS_stx_h;
+                        break;
+                    case EA_4BYTE:
+                        assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) ||
+                               (ins == INS_fst_s) || (ins == INS_fld_s));
+                        assert(INS_fst_s > INS_st_d);
+                        if (ins <= INS_ld_wu)
+                        {
+                            if (varTypeIsUnsigned(indir->TypeGet()))
+                                ins = INS_ldx_wu;
+                            else
+                                ins = INS_ldx_w;
+                        }
+                        else if (ins == INS_fld_s)
+                            ins = INS_fldx_s;
+                        else if (ins == INS_fst_s)
+                            ins = INS_fstx_s;
                         else
-                            ins = INS_ldx_w;
-                    }
-                    else if (ins == INS_fld_s)
-                        ins = INS_fldx_s;
-                    else if (ins == INS_fst_s)
-                        ins = INS_fstx_s;
-                    else
-                        ins = INS_stx_w;
-                    break;
-                case EA_8BYTE:
-                assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) || (ins == INS_fst_d) || (ins == INS_fld_d));
-                    assert(INS_fst_d > INS_st_d);
-                    if (ins <= INS_ld_wu)
-                    {
-                        ins = INS_ldx_d;
-                    }
-                    else if (ins == INS_fld_d)
-                        ins = INS_fldx_d;
-                    else if (ins == INS_fst_d)
-                        ins = INS_fstx_d;
-                    else
-                        ins = INS_stx_d;
-                    break;
-                default:
-                    assert(!"------------TODO for LOONGARCH64: unsupported ins.");
+                            ins = INS_stx_w;
+                        break;
+                    case EA_8BYTE:
+                        assert(((ins <= INS_ld_wu) && (ins >= INS_ld_b)) || ((ins <= INS_st_d) && (ins >= INS_st_b)) ||
+                               (ins == INS_fst_d) || (ins == INS_fld_d));
+                        assert(INS_fst_d > INS_st_d);
+                        if (ins <= INS_ld_wu)
+                        {
+                            ins = INS_ldx_d;
+                        }
+                        else if (ins == INS_fld_d)
+                            ins = INS_fldx_d;
+                        else if (ins == INS_fst_d)
+                            ins = INS_fstx_d;
+                        else
+                            ins = INS_stx_d;
+                        break;
+                    default:
+                        assert(!"------------TODO for LOONGARCH64: unsupported ins.");
                 }
 
                 if (lsl > 0)
@@ -6015,7 +6082,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
 
                 // First load/store tmpReg with the large offset constant
                 emitIns_I_la(EA_PTRSIZE, tmpReg, offset);
-                //codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+                // codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
 
                 // Then load/store dataReg from/to [memBase + tmpReg]
                 emitIns_R_R_R(INS_add_d, addType, tmpReg, memBase->GetRegNum(), tmpReg);
@@ -6026,17 +6093,17 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
     else // addr is not contained, so we evaluate it into a register
     {
 #ifdef DEBUG
-  if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
-  {
-      // If the local var is a gcref or byref, the local var better be untracked, because we have
-      // no logic here to track local variable lifetime changes, like we do in the contained case
-      // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local
-      // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth.
-      GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
-      unsigned             lclNum  = varNode->GetLclNum();
-      LclVarDsc*           varDsc  = emitComp->lvaGetDesc(lclNum);
-      assert(!varDsc->lvTracked);
-  }
+        if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR))
+        {
+            // If the local var is a gcref or byref, the local var better be untracked, because we have
+            // no logic here to track local variable lifetime changes, like we do in the contained case
+            // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local
+            // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth.
+            GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+            unsigned             lclNum  = varNode->GetLclNum();
+            LclVarDsc*           varDsc  = emitComp->lvaGetDesc(lclNum);
+            assert(!varDsc->lvTracked);
+        }
 #endif // DEBUG
         // Then load/store dataReg from/to [addrReg]
         emitIns_R_R_I(ins, attr, dataReg, addr->GetRegNum(), 0);
@@ -6111,7 +6178,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
         {
             assert(attr == EA_8BYTE);
         }
-        else if (ins == INS_add_w)// || ins == INS_add
+        else if (ins == INS_add_w) // || ins == INS_add
         {
             assert(attr == EA_4BYTE);
         }
@@ -6134,13 +6201,14 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
         else if ((ins == INS_mul_d) || (ins == INS_mulh_d) || (ins == INS_mulh_du))
         {
             assert(attr == EA_8BYTE);
-            //NOTE: overflow format doesn't support an int constant operand directly.
+            // NOTE: overflow format doesn't support an int constant operand directly.
             assert(intConst == nullptr);
         }
-        else if ((ins == INS_mul_w) || (ins == INS_mulw_d_w) || (ins == INS_mulh_w) || (ins == INS_mulh_wu) || (ins == INS_mulw_d_wu))
+        else if ((ins == INS_mul_w) || (ins == INS_mulw_d_w) || (ins == INS_mulh_w) || (ins == INS_mulh_wu) ||
+                 (ins == INS_mulw_d_wu))
         {
             assert(attr == EA_4BYTE);
-            //NOTE: overflow format doesn't support an int constant operand directly.
+            // NOTE: overflow format doesn't support an int constant operand directly.
             assert(intConst == nullptr);
         }
         else
@@ -6153,10 +6221,10 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
     }
 
     if (intConst != nullptr)
-    {//should re-design this case!!! ---2020.04.11.
+    { // should re-design this case!!! ---2020.04.11.
         ssize_t imm = intConst->IconValue();
         if (ins == INS_andi || ins == INS_ori || ins == INS_xori)
-            //assert((0 <= imm) && (imm <= 0xfff));
+            // assert((0 <= imm) && (imm <= 0xfff));
             assert((-2048 <= imm) && (imm <= 0xfff));
         else
             assert((-2049 < imm) && (imm < 2048));
@@ -6300,7 +6368,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
             {
                 if (attr == EA_4BYTE)
                     emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, dst->GetRegNum(), REG_R0, 63, 32);
-                //else
+                // else
                 //{
                 //    assert(!"unimplemented on LOONGARCH yet:  ulong * ulong !!!");
                 //}
@@ -6322,7 +6390,8 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
                     assert(REG_RA != src1->GetRegNum());
                     assert(REG_RA != src2->GetRegNum());
                     size_t imm = (EA_SIZE(attr) == EA_8BYTE) ? 63 : 31;
-                    emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_RA, dst->GetRegNum(), imm);
+                    emitIns_R_R_I(EA_SIZE(attr) == EA_8BYTE ? INS_srai_d : INS_srai_w, attr, REG_RA, dst->GetRegNum(),
+                                  imm);
                     codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bne, REG_R21, nullptr, REG_RA);
                 }
             }
@@ -6332,14 +6401,14 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
     {
         emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
 
-        //NOTE: can/should amend: LOONGARCH needs to sign-extend dst when deal with 32bit data.
+        // NOTE: can/should amend: LOONGARCH needs to sign-extend dst when deal with 32bit data.
         if (EA_SIZE(attr) == EA_4BYTE)
             emitIns_R_R_I(INS_slli_w, attr, dst->GetRegNum(), dst->GetRegNum(), 0);
     }
     else
     {
-        regNumber regOp1 = src1->GetRegNum();
-        regNumber regOp2 = src2->GetRegNum();
+        regNumber regOp1       = src1->GetRegNum();
+        regNumber regOp2       = src2->GetRegNum();
         regNumber saveOperReg1 = REG_NA;
         regNumber saveOperReg2 = REG_NA;
 
@@ -6350,14 +6419,14 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
                 assert(REG_R21 != regOp1);
                 assert(REG_RA != regOp1);
                 emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp1, /*src1->GetRegNum(),*/ 31, 0);
-                regOp1 = REG_RA;//dst->ExtractTempReg();
+                regOp1 = REG_RA; // dst->ExtractTempReg();
             }
             if (src2->gtType == TYP_INT)
             {
                 assert(REG_R21 != regOp2);
                 assert(REG_RA != regOp2);
                 emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_R21, regOp2, /*src2->GetRegNum(),*/ 31, 0);
-                regOp2 = REG_R21;//dst->ExtractTempReg();
+                regOp2 = REG_R21; // dst->ExtractTempReg();
             }
         }
         if (needCheckOv)
@@ -6396,7 +6465,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
         {
             if (dst->OperGet() == GT_ADD || dst->OperGet() == GT_SUB)
             {
-                ssize_t imm;
+                ssize_t   imm;
                 regNumber tempReg1;
                 regNumber tempReg2;
                 // ADD : A = B + C
@@ -6439,7 +6508,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
                         emitIns_R_R_I(INS_andi, attr, tempReg2, tempReg2, imm);
                     }
                     // if (B > 0 && C < 0) || (B < 0  && C > 0), skip overflow
-                    BasicBlock* tmpLabel = codeGen->genCreateTempLabel();
+                    BasicBlock* tmpLabel  = codeGen->genCreateTempLabel();
                     BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel();
                     BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel();
 
@@ -6448,7 +6517,8 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
                     emitIns_J_cond_la(INS_bne, tmpLabel3, tempReg2, REG_R0);
 
                     // B > 0 and C > 0, if A < B, goto overflow
-                    emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1, dst->OperGet() == GT_ADD ? saveOperReg1  : saveOperReg2);
+                    emitIns_J_cond_la(INS_bge, tmpLabel, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1,
+                                      dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2);
 
                     codeGen->genDefineTempLabel(tmpLabel2);
 
@@ -6457,7 +6527,8 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
                     codeGen->genDefineTempLabel(tmpLabel3);
 
                     // B < 0 and C < 0, if A > B, goto overflow
-                    emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1  : saveOperReg2, dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1);
+                    emitIns_J_cond_la(INS_blt, tmpLabel2, dst->OperGet() == GT_ADD ? saveOperReg1 : saveOperReg2,
+                                      dst->OperGet() == GT_ADD ? dst->GetRegNum() : saveOperReg1);
 
                     codeGen->genDefineTempLabel(tmpLabel);
                 }
@@ -6477,7 +6548,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
     return dst->GetRegNum();
 }
 
-unsigned  emitter::get_curTotalCodeSize()
+unsigned emitter::get_curTotalCodeSize()
 {
     return emitTotalCodeSize;
 }
@@ -6501,9 +6572,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
 {
     insExecutionCharacteristics result;
 
-    //TODO: support this function for LoongArch64.
-    result.insThroughput = PERFSCORE_THROUGHPUT_ZERO;
-    result.insLatency = PERFSCORE_LATENCY_ZERO;
+    // TODO: support this function for LoongArch64.
+    result.insThroughput       = PERFSCORE_THROUGHPUT_ZERO;
+    result.insLatency          = PERFSCORE_LATENCY_ZERO;
     result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE;
 
     return result;
@@ -6544,7 +6615,7 @@ const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName)
 //
 bool emitter::IsMovInstruction(instruction ins)
 {
-   switch (ins)
+    switch (ins)
     {
         case INS_mov:
         case INS_fmov_s:
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
index 50da1b09a0f20..e9cc1e9d831d7 100644
--- a/src/coreclr/jit/emitloongarch64.h
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -31,7 +31,7 @@ struct CnsVal
 const char* emitFPregName(unsigned reg, bool varName = true);
 const char* emitVectorRegName(regNumber reg);
 
-//NOTE: At least 32bytes within dst.
+// NOTE: At least 32bytes within dst.
 void emitDisInsName(code_t code, const BYTE* dst, instrDesc* id);
 #endif // DEBUG
 
@@ -47,16 +47,14 @@ instrDesc* emitNewInstrCallDir(int              argCnt,
                                VARSET_VALARG_TP GCvars,
                                regMaskTP        gcrefRegs,
                                regMaskTP        byrefRegs,
-                               emitAttr         retSize
-                               MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+                               emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
 
 instrDesc* emitNewInstrCallInd(int              argCnt,
                                ssize_t          disp,
                                VARSET_VALARG_TP GCvars,
                                regMaskTP        gcrefRegs,
                                regMaskTP        byrefRegs,
-                               emitAttr         retSize
-                               MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+                               emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
 
 /************************************************************************/
 /*               Private helpers for instruction output                 */
@@ -75,12 +73,13 @@ void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTr
 //  Emit the 32-bit LOONGARCH64 instruction 'code' into the 'dst'  buffer
 unsigned emitOutput_Instr(BYTE* dst, code_t code);
 
-//NOTEADD: New functions in emitarm64.h
+// NOTEADD: New functions in emitarm64.h
 // Method to do check if mov is redundant with respect to the last instruction.
 // If yes, the caller of this method can choose to omit current mov instruction.
 static bool IsMovInstruction(instruction ins);
 bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip);
-bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);//New functions end.
+bool IsRedundantLdStr(
+    instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end.
 
 /************************************************************************
 *
@@ -104,9 +103,9 @@ union bitMaskImm {
 *   representation imm(i16,hw)
 */
 
-//static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size);
+// static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size);
 
-//static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size);
+// static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size);
 
 /************************************************************************
 *
@@ -129,9 +128,9 @@ union halfwordImm {
 *   representation imm(i16,hw)
 */
 
-//static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size);
+// static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size);
 
-//static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size);
+// static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size);
 
 /************************************************************************
 *
@@ -155,9 +154,9 @@ union byteShiftedImm {
 *   representation imm(i8,by)
 */
 
-//static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);
+// static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);
 
-//static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
+// static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
 
 /************************************************************************
 *
@@ -180,9 +179,9 @@ union floatImm8 {
 *  Convert between a double and its 'float 8-bit immediate' representation
 */
 
-//static emitter::floatImm8 emitEncodeFloatImm8(double immDbl);
+// static emitter::floatImm8 emitEncodeFloatImm8(double immDbl);
 
-//static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm);
+// static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm);
 
 /************************************************************************
 *
@@ -193,9 +192,9 @@ union floatImm8 {
 union condFlagsImm {
     struct
     {
-        //insCond   cond : 4;  // bits  0..3
-        //insCflags flags : 4; // bits  4..7
-        unsigned  imm5 : 5;  // bits  8..12
+        // insCond   cond : 4;  // bits  0..3
+        // insCflags flags : 4; // bits  4..7
+        unsigned imm5 : 5; // bits  8..12
     };
     unsigned immCFVal; // concat imm5:flags:cond forming an 13-bit unsigned immediate
 };
@@ -209,19 +208,19 @@ static bool isIntegerRegister(regNumber reg)
 // Returns true if 'value' is a legal signed immediate 12 bit encoding.
 static bool isValidSimm12(ssize_t value)
 {
-    return -( ((int)1) << 11 ) <= value && value < ( ((int)1) << 11 );
+    return -(((int)1) << 11) <= value && value < (((int)1) << 11);
 };
 
 // Returns true if 'value' is a legal signed immediate 16 bit encoding.
 static bool isValidSimm16(ssize_t value)
 {
-    return -( ((int)1) << 15 ) <= value && value < ( ((int)1) << 15 );
+    return -(((int)1) << 15) <= value && value < (((int)1) << 15);
 };
 
 // Returns true if 'value' is a legal signed immediate 20 bit encoding.
 static bool isValidSimm20(ssize_t value)
 {
-    return -( ((int)1) << 19 ) <= value && value < ( ((int)1) << 19 );
+    return -(((int)1) << 19) <= value && value < (((int)1) << 19);
 };
 
 /************************************************************************/
@@ -229,7 +228,6 @@ static bool isValidSimm20(ssize_t value)
 /************************************************************************/
 
 public:
-
 // Returns the number of bits used by the given 'size'.
 inline static unsigned getBitWidth(emitAttr size)
 {
@@ -273,7 +271,7 @@ void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
 
 void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE);
 
-//NOTEADD: NEW function in emitarm64.
+// NOTEADD: NEW function in emitarm64.
 void emitIns_Mov(
     instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE);
 
@@ -310,13 +308,13 @@ void emitIns_R_R_R_Ext(instruction ins,
                        insOpts     opt         = INS_OPTS_NONE,
                        int         shiftAmount = -1);
 
-//NODECHANGE: ADD an arg.
+// NODECHANGE: ADD an arg.
 void emitIns_R_R_I_I(
     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE);
 
 void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
 
-//void emitIns_BARR(instruction ins, insBarrier barrier);
+// void emitIns_BARR(instruction ins, insBarrier barrier);
 
 void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
 
@@ -325,7 +323,7 @@ void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
 void emitIns_S_S_R_R(
     instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
 
-//void emitIns_R_R_S(
+// void emitIns_R_R_S(
 //    instruction ins, emitAttr attr, regNumber ireg, regNumber ireg2, int sa);
 
 void emitIns_R_R_S_S(
@@ -352,13 +350,12 @@ void emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int of
 
 void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
 
-//NODECHANGE: ADD a description of arguments "disp"
+// NODECHANGE: ADD a description of arguments "disp"
 void emitIns_R_AI(instruction ins,
                   emitAttr    attr,
                   regNumber   reg,
                   ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
-
 void emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
 
 void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
@@ -380,7 +377,7 @@ enum EmitCallType
 
     EC_FUNC_TOKEN, //   Direct call to a helper/static/nonvirtual/global method
                    //  EC_FUNC_TOKEN_INDIR,    // Indirect call to a helper/static/nonvirtual/global method
-    //EC_FUNC_ADDR,  // Direct call to an absolute address
+    // EC_FUNC_ADDR,  // Direct call to an absolute address
 
     //  EC_FUNC_VIRTUAL,        // Call to a virtual method (using the vtable)
     EC_INDIR_R, // Indirect call via register
@@ -394,28 +391,27 @@ enum EmitCallType
 void emitIns_Call(EmitCallType          callType,
                   CORINFO_METHOD_HANDLE methHnd,
                   INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
-                  void*            addr,
-                  ssize_t          argSize,
-                  emitAttr         retSize
-                  MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                  void*    addr,
+                  ssize_t  argSize,
+                  emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
                   VARSET_VALARG_TP ptrVars,
                   regMaskTP        gcrefRegs,
                   regMaskTP        byrefRegs,
                   const DebugInfo& di,
-                  regNumber        ireg          = REG_NA,
-                  regNumber        xreg          = REG_NA,
-                  unsigned         xmul          = 0,
-                  ssize_t          disp          = 0,
-                  bool             isJump        = false);
+                  regNumber        ireg   = REG_NA,
+                  regNumber        xreg   = REG_NA,
+                  unsigned         xmul   = 0,
+                  ssize_t          disp   = 0,
+                  bool             isJump = false);
 
 unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code);
-//BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i);
-//BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id);
-//BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
-//BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg);
-//BYTE* emitOutputShortConstant(
+// BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i);
+// BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id);
+// BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
+// BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg);
+// BYTE* emitOutputShortConstant(
 //    BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize);
 
-unsigned  get_curTotalCodeSize(); // bytes of code
+unsigned get_curTotalCodeSize(); // bytes of code
 
 #endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 9cfd25bbfcc42..7d9446d38a2e1 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -3463,10 +3463,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
             case GT_CNS_STR:
             case GT_CNS_LNG:
             case GT_CNS_INT:
-            // TODO: workround, should amend for LoongArch64.
+                // TODO: workround, should amend for LoongArch64.
                 costEx = 4;
                 costSz = 4;
-            goto COMMON_CNS;
+                goto COMMON_CNS;
 #else
             case GT_CNS_STR:
             case GT_CNS_LNG:
@@ -21498,28 +21498,28 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
             assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE)));
 
             DWORD numFloatFields = comp->info.compCompHnd->getFieldTypeByHnd(retClsHnd);
-            BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+            BYTE  gcPtrs[2]      = {TYPE_GC_NONE, TYPE_GC_NONE};
             comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
 
             if (numFloatFields & 0x8)
             {
                 assert((structSize > 8) == ((numFloatFields & 0x30) > 0));
-                m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[0]                = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1]                = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
                 comp->compFloatingPointUsed = true;
             }
             else if (numFloatFields & 0x2)
             {
                 assert((structSize > 8) == ((numFloatFields & 0x30) > 0));
-                m_regType[0] = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                m_regType[1] = numFloatFields & 0x20 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
+                m_regType[0]                = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1]                = numFloatFields & 0x20 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
                 comp->compFloatingPointUsed = true;
             }
             else if (numFloatFields & 0x4)
             {
                 assert((structSize > 8) == ((numFloatFields & 0x30) > 0));
-                m_regType[0] = numFloatFields & 0x10 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
-                m_regType[1] = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[0]                = numFloatFields & 0x10 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
+                m_regType[1]                = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
                 comp->compFloatingPointUsed = true;
             }
             else
@@ -21728,10 +21728,10 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const
     }
     else
     {
-        noway_assert(idx < 2);                                  // Up to 2 return registers for two-float-field structs
+        noway_assert(idx < 2); // Up to 2 return registers for two-float-field structs
         if (varTypeIsIntegralOrI(regType))
             resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // V0 or V1
-        else //if (!varTypeIsIntegralOrI(regType))
+        else // if (!varTypeIsIntegralOrI(regType))
             resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1
     }
 
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index f06e2be8c5a7d..ea0529989c056 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -11338,11 +11338,12 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
 
         if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
         {
-            // insert an explicit upcast
+// insert an explicit upcast
 #ifdef TARGET_LOONGARCH64
             if (op1->TypeGet() == TYP_INT && op1->gtOper == GT_CNS_INT)
             {
-                op1->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal;
+                op1->AsIntCon()->gtIconVal =
+                    fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal;
                 op1->gtType = TYP_LONG;
             }
             else if (op1->gtOper == GT_CNS_INT)
@@ -11353,11 +11354,12 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
         }
         else if (genActualType(op2->TypeGet()) != TYP_I_IMPL)
         {
-            // insert an explicit upcast
+// insert an explicit upcast
 #ifdef TARGET_LOONGARCH64
             if (op2->TypeGet() == TYP_INT && op2->gtOper == GT_CNS_INT)
             {
-                op2->AsIntCon()->gtIconVal = fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal;
+                op2->AsIntCon()->gtIconVal =
+                    fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal;
                 op2->gtType = TYP_LONG;
             }
             else
@@ -12425,7 +12427,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 else
                     op1 = impImplicitIorI4Cast(op1, lclTyp);
 #else
-                op1 = impImplicitIorI4Cast(op1, lclTyp);
+                op1  = impImplicitIorI4Cast(op1, lclTyp);
 #endif
 
 #ifdef TARGET_64BIT
@@ -13514,12 +13516,13 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 if (op2->gtOper == GT_CNS_INT && op2->AsIntCon()->gtIconVal > 31)
                 {
                     type = TYP_LONG;
-                } else
+                }
+                else
                     type = genActualType(op1->TypeGet());
 #else
                 type = genActualType(op1->TypeGet());
 #endif
-                op1  = gtNewOperNode(oper, type, op1, op2);
+                op1 = gtNewOperNode(oper, type, op1, op2);
 
                 impPushOnStack(op1, tiRetVal);
                 break;
@@ -13752,24 +13755,26 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 #ifdef TARGET_LOONGARCH64
                     if (op2->gtOper == GT_CNS_INT)
                     {
-                        op2->AsIntCon()->gtIconVal = uns ? (uint32_t)op2->AsIntCon()->gtIconVal : (int32_t)op2->AsIntCon()->gtIconVal;
+                        op2->AsIntCon()->gtIconVal =
+                            uns ? (uint32_t)op2->AsIntCon()->gtIconVal : (int32_t)op2->AsIntCon()->gtIconVal;
                         op2->gtType = TYP_LONG;
                     }
                     else
 #endif
-                    op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL);
+                        op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL);
                 }
                 else if (varTypeIsI(op2) && (genActualType(op1) == TYP_INT))
                 {
 #ifdef TARGET_LOONGARCH64
                     if (op1->gtOper == GT_CNS_INT)
                     {
-                        op1->AsIntCon()->gtIconVal = uns ? (uint32_t)op1->AsIntCon()->gtIconVal : (int32_t)op1->AsIntCon()->gtIconVal;
+                        op1->AsIntCon()->gtIconVal =
+                            uns ? (uint32_t)op1->AsIntCon()->gtIconVal : (int32_t)op1->AsIntCon()->gtIconVal;
                         op1->gtType = TYP_LONG;
                     }
                     else
 #endif
-                    op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL);
+                        op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL);
                 }
 #endif // TARGET_64BIT
 
@@ -13863,16 +13868,16 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
 #ifdef TARGET_64BIT
 #ifdef TARGET_LOONGARCH64
-                if ((op2->OperGet() == GT_CNS_INT)/* && (op2->AsIntCon()->IconValue() == 0)*/)
+                if ((op2->OperGet() == GT_CNS_INT) /* && (op2->AsIntCon()->IconValue() == 0)*/)
                 {
                     op2->gtType = op1->TypeGet();
                 }
-                /*if (op1->OperGet() == GT_CNS_INT)
-                {
-                    //assert(op1->gtType == op2->TypeGet());
-                    //op2->gtType = op1->TypeGet();
-                    op1->gtFlags |= GTF_CONTAINED;
-                }*/
+/*if (op1->OperGet() == GT_CNS_INT)
+{
+    //assert(op1->gtType == op2->TypeGet());
+    //op2->gtType = op1->TypeGet();
+    op1->gtFlags |= GTF_CONTAINED;
+}*/
 #else
                 if ((op1->TypeGet() == TYP_I_IMPL) && (genActualType(op2->TypeGet()) == TYP_INT))
                 {
@@ -14180,7 +14185,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0);
                     op1->gtType = genActualType(lclTyp);
                     impPushOnStack(op1, tiRetVal);
-                    //opcode = CEE_LDC_I4_0;
+                    // opcode = CEE_LDC_I4_0;
                     break;
                 }
 #endif
@@ -14193,37 +14198,38 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     uns = false;
                 }
 
-                // At this point uns, ovf, callNode are all set.
+// At this point uns, ovf, callNode are all set.
 
 #ifdef TARGET_LOONGARCH64
-                    if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtOper == GT_CNS_INT)
+                if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtOper == GT_CNS_INT)
+                {
+                    switch (lclTyp)
                     {
-                        switch (lclTyp)
-                        {
-                            case TYP_BYTE:
-                                op1->AsIntCon()->gtIconVal = (int8_t)op1->AsIntCon()->gtIconVal;
-                                break;
-                            case TYP_UBYTE:
-                                op1->AsIntCon()->gtIconVal = (uint8_t)op1->AsIntCon()->gtIconVal;
-                                break;
-                            case TYP_USHORT:
-                                op1->AsIntCon()->gtIconVal = (uint16_t)op1->AsIntCon()->gtIconVal;
-                                break;
-                            case TYP_SHORT:
-                                op1->AsIntCon()->gtIconVal = (short)op1->AsIntCon()->gtIconVal;
-                                break;
-                            default:
-                                assert(!"unexpected type");
-                                return;
-                        }
+                        case TYP_BYTE:
+                            op1->AsIntCon()->gtIconVal = (int8_t)op1->AsIntCon()->gtIconVal;
+                            break;
+                        case TYP_UBYTE:
+                            op1->AsIntCon()->gtIconVal = (uint8_t)op1->AsIntCon()->gtIconVal;
+                            break;
+                        case TYP_USHORT:
+                            op1->AsIntCon()->gtIconVal = (uint16_t)op1->AsIntCon()->gtIconVal;
+                            break;
+                        case TYP_SHORT:
+                            op1->AsIntCon()->gtIconVal = (short)op1->AsIntCon()->gtIconVal;
+                            break;
+                        default:
+                            assert(!"unexpected type");
+                            return;
+                    }
 
-                        op1->gtType == TYP_INT;
+                    op1->gtType == TYP_INT;
 
-                        impPushOnStack(op1, tiRetVal);
-                        break;
-                    } else
+                    impPushOnStack(op1, tiRetVal);
+                    break;
+                }
+                else
 #endif
-                if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND)
+                    if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND)
                 {
                     op2 = op1->AsOp()->gtOp2;
 
@@ -14288,7 +14294,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     }
                     else
 #ifdef TARGET_LOONGARCH64
-                    if (type != TYP_LONG)
+                        if (type != TYP_LONG)
                     {
                         if (!ovfl && op1->gtOper == GT_CNS_INT && op1->TypeGet() == TYP_LONG)
                         {
@@ -14296,12 +14302,12 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                             if (lclTyp == TYP_INT)
                             {
                                 op1->AsIntCon()->gtIconVal = (int32_t)op1->AsIntCon()->gtIconVal;
-                                op1->gtType = TYP_INT;
+                                op1->gtType                = TYP_INT;
                             }
                             else if (lclTyp == TYP_UINT)
                             {
                                 op1->AsIntCon()->gtIconVal = (uint32_t)op1->AsIntCon()->gtIconVal;
-                                op1->gtType = TYP_UINT;
+                                op1->gtType                = TYP_UINT;
                             }
                             else
                                 op1 = gtNewCastNode(type, op1, uns, lclTyp);
@@ -16135,7 +16141,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     }
                     else
 #ifdef TARGET_LOONGARCH64
-                    if (genActualType(op2->TypeGet()) != TYP_INT)
+                        if (genActualType(op2->TypeGet()) != TYP_INT)
 #endif
                     {
                         bool isUnsigned = false;
@@ -17728,7 +17734,7 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
             impBashVarAddrsToI(op2);
             op2 = impImplicitIorI4Cast(op2, info.compRetType);
             op2 = impImplicitR4orR8Cast(op2, info.compRetType);
-            // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF.
+// Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF.
 #ifdef TARGET_LOONGARCH64
             assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
                       (genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType)) ||
@@ -18608,17 +18614,17 @@ void Compiler::impImportBlock(BasicBlock* block)
             }
             else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_I_IMPL)
             {
-                // Spill clique has decided this should be "native int", but this block only pushes an "int".
-                // Insert a sign-extension to "native int" so we match the clique.
+// Spill clique has decided this should be "native int", but this block only pushes an "int".
+// Insert a sign-extension to "native int" so we match the clique.
 #ifdef TARGET_LOONGARCH64
-                if (tree->gtOper == GT_CNS_INT/* && !tree->AsIntCon()->gtIconVal*/)
+                if (tree->gtOper == GT_CNS_INT /* && !tree->AsIntCon()->gtIconVal*/)
                 {
                     tree->gtType = TYP_I_IMPL;
                     tree->SetContained();
                 }
                 else
 #endif
-                verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
+                    verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
             }
 
             // Consider the case where one branch left a 'byref' on the stack and the other leaves
@@ -18640,8 +18646,8 @@ void Compiler::impImportBlock(BasicBlock* block)
                 }
                 else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_BYREF)
                 {
-                    // Spill clique has decided this should be "byref", but this block only pushes an "int".
-                    // Insert a sign-extension to "native int" so we match the clique size.
+// Spill clique has decided this should be "byref", but this block only pushes an "int".
+// Insert a sign-extension to "native int" so we match the clique size.
 #ifdef TARGET_LOONGARCH64
                     if (tree->gtOper == GT_CNS_INT /*&& !tree->AsIntCon()->gtIconVal*/)
                     {
@@ -18650,7 +18656,7 @@ void Compiler::impImportBlock(BasicBlock* block)
                     }
                     else
 #endif
-                    verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
+                        verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
                 }
             }
 #endif // TARGET_64BIT
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 62b2eade19cec..e80855be507dc 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -900,13 +900,13 @@ void CodeGen::inst_RV_TT(instruction ins,
                     regSet.verifyRegUsed(regTmp);
                     return;
             }
-#else  // !TARGET_ARM
+#else // !TARGET_ARM
 #ifdef TARGET_LOONGARCH64
             if (emitter::isFloatReg(reg))
-                assert((ins==INS_fld_d) || (ins==INS_fld_s));
+                assert((ins == INS_fld_d) || (ins == INS_fld_s));
             else if (emitter::isGeneralRegister(reg) && (ins != INS_lea))
-            {//TODO should amend for LOONGARCH64 !!!
-                //assert((ins==INS_ld_d) || (ins==INS_ld_w));
+            { // TODO should amend for LOONGARCH64 !!!
+                // assert((ins==INS_ld_d) || (ins==INS_ld_w));
                 ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d;
             }
 #endif
@@ -1476,7 +1476,7 @@ bool CodeGenInterface::validImmForBL(ssize_t addr)
 
 #if defined(TARGET_LOONGARCH64)
 bool CodeGenInterface::validImmForBAL(ssize_t addr)
-{//TODO: can amend/optimize for LoongArch64.
+{ // TODO: can amend/optimize for LoongArch64.
     return false;
 }
 #endif // TARGET_LOONGARCH64
@@ -1778,8 +1778,8 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
     }
     else
     {
-        //assert((TYP_LONG == srcType) || (TYP_ULONG == srcType));
-        ins = INS_ld_d;//default ld_d.
+        // assert((TYP_LONG == srcType) || (TYP_ULONG == srcType));
+        ins = INS_ld_d; // default ld_d.
     }
 #else
     NYI("ins_Load");
@@ -2005,8 +2005,8 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
         ins = INS_st_h;
     else if ((TYP_INT == dstType) || (TYP_UINT == dstType))
         ins = INS_st_w;
-    else //if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType))
-        ins = INS_st_d;//default st_d.
+    else                // if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType))
+        ins = INS_st_d; // default st_d.
 #else
     NYI("ins_Store");
 #endif
diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h
index 1c16d53fd453a..f171bc69a7a68 100644
--- a/src/coreclr/jit/instrsloongarch64.h
+++ b/src/coreclr/jit/instrsloongarch64.h
@@ -492,7 +492,6 @@ INSTS(fld_d,	"fld.d",	0,	LD,	IF_LA,	0x2b800000)
 INSTS(fst_s,	"fst.s",	0,	ST,	IF_LA,	0x2b400000)
 INSTS(fst_d,	"fst.d",	0,	ST,	IF_LA,	0x2bc00000)
 
-
 // clang-format on
 /*****************************************************************************/
 #undef INSTS
diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h
index 987ea401c503a..96b9f15c279a0 100644
--- a/src/coreclr/jit/jit.h
+++ b/src/coreclr/jit/jit.h
@@ -309,7 +309,7 @@
 // Arm64 Windows supports FEATURE_ARG_SPLIT, note this is different from
 // the official Arm64 ABI.
 // Case: splitting 16 byte struct between x7 and stack
-#if defined(TARGET_ARM) || defined(TARGET_ARM64)/* || defined(TARGET_LOONGARCH64)*/
+#if defined(TARGET_ARM) || defined(TARGET_ARM64) /* || defined(TARGET_LOONGARCH64)*/
 #define FEATURE_ARG_SPLIT 1
 #else
 #define FEATURE_ARG_SPLIT 0
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 2dff5054a8cc8..ce652f10e91fe 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -610,7 +610,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
     }
 #elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
     // On System V type environment the float registers are not indexed together with the int ones.
-    varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum;
+    varDscInfo->floatRegArgNum         = varDscInfo->intRegArgNum;
 #endif // TARGET*
 
     CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args;
@@ -644,12 +644,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         CORINFO_CLASS_HANDLE typeHnd = nullptr;
 
 #if defined(TARGET_LOONGARCH64)
-        int flags = 0;
-        CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType2(&info.compMethodInfo->args, argLst, &typeHnd, &flags);
+        int                flags = 0;
+        CorInfoTypeWithMod corInfoType =
+            info.compCompHnd->getArgType2(&info.compMethodInfo->args, argLst, &typeHnd, &flags);
 #else
         CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd);
 #endif
-        varDsc->lvIsParam              = 1;
+        varDsc->lvIsParam = 1;
 
         lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args);
 
@@ -904,19 +905,19 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 assert(varDsc->lvExactSize <= argSize);
                 float_num = 1;
 
-                arg1_Type = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT;
+                arg1_Type             = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT;
                 canPassArgInRegisters = varDscInfo->canEnreg(arg1_Type, 1);
             }
             else if (flags & 0x8)
             {
-                arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT;
-                arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT;
-                float_num = 2;
+                arg1_Type             = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT;
+                arg2_Type             = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT;
+                float_num             = 2;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2);
             }
             else if (flags & 2)
             {
-                float_num = 1;
+                float_num             = 1;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
                 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
 
@@ -925,7 +926,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             }
             else if (flags & 4)
             {
-                float_num = 1;
+                float_num             = 1;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
                 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
 
@@ -937,8 +938,8 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             {
                 assert(float_num > 0);
                 canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
-                arg1_Type = TYP_UNKNOWN;
-                arg2_Type = TYP_UNKNOWN;
+                arg1_Type             = TYP_UNKNOWN;
+                arg2_Type             = TYP_UNKNOWN;
             }
         }
         else
@@ -949,12 +950,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             if (!canPassArgInRegisters && varTypeIsFloating(argType))
             {
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister);
-                argType = canPassArgInRegisters ? TYP_I_IMPL : argType;
+                argType               = canPassArgInRegisters ? TYP_I_IMPL : argType;
             }
             if (!canPassArgInRegisters && (cSlots > 1))
             {
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1);
-                arg1_Type = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN;
+                arg1_Type             = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN;
             }
 #endif
         }
@@ -986,7 +987,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 }
             }
             else
-#elif  defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_LOONGARCH64)
             if (arg1_Type != TYP_UNKNOWN)
             {
                 firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1_Type, 1);
@@ -1051,13 +1052,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     {
                         firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2_Type, 1);
                         varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2_Type));
-                        varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2_Type) == 4 ? 1 : 0;
+                        varDsc->lvIs4Field2            = (int)emitActualTypeSize(arg2_Type) == 4 ? 1 : 0;
                         varDscInfo->hasMultiSlotStruct = true;
                     }
                     else if (cSlots > 1)
                     {
                         varDsc->lvIsSplit = 1;
-                        //varDsc->lvFldOffset = 0;
+                        // varDsc->lvFldOffset = 0;
                         varDsc->SetOtherArgReg(REG_STK);
                         varDscInfo->hasMultiSlotStruct = true;
                         varDscInfo->setAllRegArgUsed(arg1_Type);
@@ -1515,9 +1516,9 @@ void Compiler::lvaInitVarDsc(LclVarDsc*              varDsc,
     varDsc->lvIsImplicitByRef = 0;
 #elif defined(TARGET_LOONGARCH64)
     varDsc->lvIsImplicitByRef = 0;
-    varDsc->lvIs4Field1 = 0;
-    varDsc->lvIs4Field2 = 0;
-    varDsc->lvIsSplit = 0;
+    varDsc->lvIs4Field1       = 0;
+    varDsc->lvIs4Field2       = 0;
+    varDsc->lvIsSplit         = 0;
 #endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
     // Set the lvType (before this point it is TYP_UNDEF).
@@ -1849,7 +1850,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE
     const int MaxOffset      = MAX_NumOfFieldsInPromotableStruct * FP_REGSIZE_BYTES;
 #endif // defined(TARGET_XARCH) || defined(TARGET_ARM64)
 #else  // !FEATURE_SIMD
-    const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
+    const int MaxOffset       = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
 #endif // !FEATURE_SIMD
 
     assert((BYTE)MaxOffset == MaxOffset); // because lvaStructFieldInfo.fldOffset is byte-sized
@@ -5418,7 +5419,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
             JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta);
             varDsc->SetStackOffset(varDsc->GetStackOffset() + delta);
 
-#if  defined(TARGET_LOONGARCH64)
+#if defined(TARGET_LOONGARCH64)
             if (varDsc->GetStackOffset() >= delta)
                 varDsc->SetStackOffset(varDsc->GetStackOffset() + (varDsc->lvIsSplit ? 8 : 0));
 #endif
@@ -5998,16 +5999,16 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
         }
 
 #elif defined(TARGET_LOONGARCH64)
-        //if (compFeatureArgSplit() && this->info.compIsVarArgs)
-        //{//TODO: should confirm for "info.compIsVarArgs".
-        //    if (varDsc->lvType == TYP_STRUCT && varDsc->lvOtherArgReg >= MAX_REG_ARG && varDsc->lvOtherArgReg != REG_NA)
-        //    {
-        //        // This is a split struct. It will account for an extra (8 bytes)
-        //        // of alignment.
-        //        varDsc->lvStkOffs += TARGET_POINTER_SIZE;
-        //        argOffs += TARGET_POINTER_SIZE;
-        //    }
-        //}
+// if (compFeatureArgSplit() && this->info.compIsVarArgs)
+//{//TODO: should confirm for "info.compIsVarArgs".
+//    if (varDsc->lvType == TYP_STRUCT && varDsc->lvOtherArgReg >= MAX_REG_ARG && varDsc->lvOtherArgReg != REG_NA)
+//    {
+//        // This is a split struct. It will account for an extra (8 bytes)
+//        // of alignment.
+//        varDsc->lvStkOffs += TARGET_POINTER_SIZE;
+//        argOffs += TARGET_POINTER_SIZE;
+//    }
+//}
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -6815,9 +6816,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 
 #ifdef TARGET_LOONGARCH64
                 if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum())
-                {//TODO: add VarArgs for LOONGARCH64.
+                { // TODO: add VarArgs for LOONGARCH64.
                     // Stack offset to parameters should point to home area which will be preallocated.
-                    varDsc->SetStackOffset(-initialStkOffs + genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES);
+                    varDsc->SetStackOffset(-initialStkOffs +
+                                           genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES);
                     continue;
                 }
 #endif
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 86925234b877b..6a3fb1fb91165 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -2526,7 +2526,7 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
 {
     assert(cmp->gtGetOp2()->IsIntegralConst());
 
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64)// || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64) // || defined(TARGET_LOONGARCH64)
     ////TODO: add optimize for LoongArch64.
     GenTree*       op1      = cmp->gtGetOp1();
     GenTreeIntCon* op2      = cmp->gtGetOp2()->AsIntCon();
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 3f5df4552c731..82809712de064 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -42,8 +42,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 bool Lowering::IsCallTargetInRange(void* addr)
 {
     ////TODO for LOONGARCH64: should amend for optimize!
-    //assert(!"unimplemented on LOONGARCH yet");
-    //return comp->codeGen->validImmForBAL((ssize_t)addr);
+    // assert(!"unimplemented on LOONGARCH yet");
+    // return comp->codeGen->validImmForBAL((ssize_t)addr);
     return false;
 }
 
@@ -72,8 +72,8 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
         switch (parentNode->OperGet())
         {
             case GT_ADD:
-                return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false
-                                                                  : ((-2048 <= immVal) && (immVal <= 2047));
+                return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false : ((-2048 <= immVal) &&
+                                                                                               (immVal <= 2047));
                 break;
             case GT_CMPXCHG:
             case GT_LOCKADD:
@@ -127,7 +127,7 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
 {
     assert(mul->OperIsMul());
 
-    //if (comp->opts.OptimizationEnabled() && mul->OperIs(GT_MUL) && mul->IsValidLongMul())
+    // if (comp->opts.OptimizationEnabled() && mul->OperIs(GT_MUL) && mul->IsValidLongMul())
     //{
     //    GenTreeCast* op1 = mul->gtGetOp1()->AsCast();
     //    GenTree*     op2 = mul->gtGetOp2();
@@ -256,7 +256,7 @@ void Lowering::LowerStoreIndir(GenTreeStoreInd* node)
 //
 void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
 {
-    GenTree*  dstAddr  = blkNode->Addr();
+    GenTree* dstAddr = blkNode->Addr();
     GenTree* src     = blkNode->Data();
     unsigned size    = blkNode->Size();
 
@@ -286,7 +286,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
             ssize_t fill = src->AsIntCon()->IconValue() & 0xFF;
             if (fill == 0)
             {
-                src->SetContained();;
+                src->SetContained();
+                ;
             }
             else if (size >= REGSIZE_BYTES)
             {
@@ -347,7 +348,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
 
             blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
         }
-////////////////////////////////////////////////////////////////////////////////////////////////////////
+        ////////////////////////////////////////////////////////////////////////////////////////////////////////
         else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT))
         {
             blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
@@ -365,7 +366,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
 
             blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
         }
-
     }
 }
 
@@ -432,9 +432,6 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT
     addr->SetContained();
 }
 
-
-
-
 void Lowering::LowerCast(GenTree* tree)
 {
     assert(tree->OperGet() == GT_CAST);
@@ -491,8 +488,8 @@ void Lowering::LowerRotate(GenTree* tree)
 
         if (rotateLeftIndexNode->IsCnsIntOrI())
         {
-            ssize_t rotateLeftIndex                 = rotateLeftIndexNode->AsIntCon()->gtIconVal;
-            ssize_t rotateRightIndex                = rotatedValueBitSize - rotateLeftIndex;
+            ssize_t rotateLeftIndex                    = rotateLeftIndexNode->AsIntCon()->gtIconVal;
+            ssize_t rotateRightIndex                   = rotatedValueBitSize - rotateLeftIndex;
             rotateLeftIndexNode->AsIntCon()->gtIconVal = rotateRightIndex;
         }
         else
@@ -515,7 +512,7 @@ void Lowering::LowerRotate(GenTree* tree)
 //
 void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     assert(simdNode->gtType != TYP_SIMD32);
 
@@ -540,7 +537,7 @@ assert(!"unimplemented on LOONGARCH yet");
 //
 void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     auto intrinsicID   = node->gtHWIntrinsicId;
     auto intrinsicInfo = HWIntrinsicInfo::lookup(node->gtHWIntrinsicId);
@@ -1375,7 +1372,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
     }
 #endif // FEATURE_SIMD
 
-    GenTree* addr          = indirNode->Addr();
+    GenTree* addr = indirNode->Addr();
     if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirNode, addr))
     {
         MakeSrcContained(indirNode, addr);
@@ -1393,7 +1390,6 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
         // make this contained, it turns into a constant that goes into an addr mode
         MakeSrcContained(indirNode, addr);
     }
-
 }
 
 //------------------------------------------------------------------------
@@ -1473,9 +1469,6 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const
 
     const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc);
 
-
-
-
 #ifdef FEATURE_SIMD
     if (varTypeIsSIMD(storeLoc))
     {
@@ -1552,7 +1545,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node)
 //
 void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     switch (simdNode->gtSIMDIntrinsicID)
     {
@@ -1619,7 +1612,7 @@ assert(!"unimplemented on LOONGARCH yet");
 //
 void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     GenTreeArgList* argList = nullptr;
     GenTree*        op1     = node->gtOp.gtOp1;
diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
index b43d29ed7cfc3..c9556de7ed488 100644
--- a/src/coreclr/jit/lsra.cpp
+++ b/src/coreclr/jit/lsra.cpp
@@ -705,7 +705,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
 #elif TARGET_LOONGARCH64
     availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd);
 #else
-    availableIntRegs   = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
+    availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
 #endif
 
 #if ETW_EBP_FRAMED
@@ -1568,12 +1568,13 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
 #endif // FEATURE_SIMD
 
         case TYP_STRUCT:
-            // TODO-1stClassStructs: support vars with GC pointers. The issue is that such
-            // vars will have `lvMustInit` set, because emitter has poor support for struct liveness,
-            // but if the variable is tracked the prolog generator would expect it to be in liveIn set,
-            // so an assert in `genFnProlog` will fire.
+// TODO-1stClassStructs: support vars with GC pointers. The issue is that such
+// vars will have `lvMustInit` set, because emitter has poor support for struct liveness,
+// but if the variable is tracked the prolog generator would expect it to be in liveIn set,
+// so an assert in `genFnProlog` will fire.
 #ifdef TARGET_LOONGARCH64
-            return !genIsValidFloatReg(varDsc->GetOtherArgReg()) && compiler->compEnregStructLocals() && !varDsc->HasGCPtr();
+            return !genIsValidFloatReg(varDsc->GetOtherArgReg()) && compiler->compEnregStructLocals() &&
+                   !varDsc->HasGCPtr();
 #else
             return compiler->compEnregStructLocals() && !varDsc->HasGCPtr();
 #endif
diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h
index 5e803336e2b4d..202760e1c3bcf 100644
--- a/src/coreclr/jit/lsra.h
+++ b/src/coreclr/jit/lsra.h
@@ -2213,15 +2213,15 @@ class RefPosition
     // no reg is allocated.
     unsigned char regOptional : 1;
 
-    // Used by RefTypeDef/Use positions of a multi-reg call node.
-    // Indicates the position of the register that this ref position refers to.
-    // The max bits needed is based on max value of MAX_RET_REG_COUNT value
-    // across all targets and that happens 4 on on Arm.  Hence index value
-    // would be 0..MAX_RET_REG_COUNT-1.
+// Used by RefTypeDef/Use positions of a multi-reg call node.
+// Indicates the position of the register that this ref position refers to.
+// The max bits needed is based on max value of MAX_RET_REG_COUNT value
+// across all targets and that happens 4 on on Arm.  Hence index value
+// would be 0..MAX_RET_REG_COUNT-1.
 #ifdef TARGET_LOONGARCH64
-    //TODO for LOONGARCH64: should confirm for ArgSplit?
+    // TODO for LOONGARCH64: should confirm for ArgSplit?
     unsigned char multiRegIdx : 3;
-#else // !TARGET_LOONGARCH64
+#else  // !TARGET_LOONGARCH64
     unsigned char multiRegIdx : 2;
 #endif // !TARGET_LOONGARCH64
 
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index ba2f6f3536f24..6d056a46d737d 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -298,8 +298,8 @@ int LinearScan::BuildNode(GenTree* tree)
             BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
             break;
 
-        //case GT_MOD:
-        //case GT_UMOD:
+        // case GT_MOD:
+        // case GT_UMOD:
         //    NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in LOONGARCH64");
         //    assert(!"Shouldn't see an integer typed GT_MOD node in LOONGARCH64");
         //    srcCount = 0;
@@ -790,7 +790,7 @@ int LinearScan::BuildNode(GenTree* tree)
 //
 int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     int srcCount = 0;
     // Only SIMDIntrinsicInit can be contained
@@ -997,7 +997,7 @@ assert(!"unimplemented on LOONGARCH yet");
 //
 int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
 {
-assert(!"unimplemented on LOONGARCH yet");
+    assert(!"unimplemented on LOONGARCH yet");
 #if 0
     NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
     int            numArgs     = HWIntrinsicInfo::lookupNumArgs(intrinsicTree);
@@ -1180,9 +1180,9 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree)
 //
 int LinearScan::BuildCall(GenTreeCall* call)
 {
-    bool            hasMultiRegRetVal = false;
-    const ReturnTypeDesc* retTypeDesc = nullptr;
-    regMaskTP       dstCandidates     = RBM_NONE;
+    bool                  hasMultiRegRetVal = false;
+    const ReturnTypeDesc* retTypeDesc       = nullptr;
+    regMaskTP             dstCandidates     = RBM_NONE;
 
     int srcCount = 0;
     int dstCount = 0;
@@ -1234,7 +1234,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
 
     RegisterType registerType = call->TypeGet();
 
-// Set destination candidates for return value of the call.
+    // Set destination candidates for return value of the call.
 
     if (hasMultiRegRetVal)
     {
@@ -1474,8 +1474,8 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
     // Registers for split argument corresponds to source
     int dstCount = argNode->gtNumRegs;
 
-    regNumber argReg  = argNode->GetRegNum();
-    regMaskTP argMask = RBM_NONE;
+    regNumber argReg                  = argNode->GetRegNum();
+    regMaskTP argMask                 = RBM_NONE;
     regMaskTP argMaskArr[MAX_REG_ARG] = {RBM_NONE};
 
     for (unsigned i = 0; i < dstCount; i++)
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 872790417791a..ba813f2779050 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -214,7 +214,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
         {
             if (!tree->gtOverflow())
             {
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)// On ARM64 All non-overflow checking conversions can be optimized
+#if defined(TARGET_ARM64) ||                                                                                           \
+    defined(TARGET_LOONGARCH64) // On ARM64 All non-overflow checking conversions can be optimized
                 return nullptr;
 #else
                 switch (dstType)
@@ -916,7 +917,8 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned          argNum,
                                     bool              isVararg,
                                     const regNumber   otherRegNum)
 {
-    fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg);
+    fgArgTabEntry* curArgTabEntry =
+        AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg);
     assert(curArgTabEntry != nullptr);
 
     curArgTabEntry->isStruct = isStruct; // is this a struct arg
@@ -926,7 +928,7 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned          argNum,
     if (numRegs == 2)
     {
         curArgTabEntry->setRegNum(1, otherRegNum);
-        //curArgTabEntry->isSplit = true;
+        // curArgTabEntry->isSplit = true;
     }
 
     return curArgTabEntry;
@@ -2888,7 +2890,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
 #elif defined(TARGET_X86)
 
-        passUsingFloatRegs = false;
+        passUsingFloatRegs   = false;
 
 #elif defined(TARGET_LOONGARCH64)
         assert(!callIsVararg);
@@ -3015,7 +3017,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
         DWORD numFloatFields = 0;
         if (!isStructArg)
         {
-            size = 1;
+            size     = 1;
             byteSize = genTypeSize(argx);
         }
         else
@@ -3059,9 +3061,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                 else if (numFloatFields & 8)
                     size = 2;
             }
-            else //if (passStructByRef)
+            else // if (passStructByRef)
             {
-                size = 1;
+                size     = 1;
                 byteSize = TARGET_POINTER_SIZE;
             }
 #else
@@ -3225,7 +3227,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
                     if (!passUsingFloatRegs)
                     {
-                        size = structSize > 8 ? 2 : 1;
+                        size           = structSize > 8 ? 2 : 1;
                         numFloatFields = 0;
                     }
                     else if (passUsingFloatRegs)
@@ -3235,20 +3237,20 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                         else if (numFloatFields & 0x4)
                         {
                             assert(size == 1);
-                            size = 2;
+                            size               = 2;
                             passUsingFloatRegs = false;
-                            nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum);
+                            nextOtherRegNum    = genMapFloatRegArgNumToRegNum(nextFltArgRegNum);
                         }
-                        else if (/*(size == 1) && */(numFloatFields & 0x2))
+                        else if (/*(size == 1) && */ (numFloatFields & 0x2))
                         {
                             assert((size == 1) && (numFloatFields & 0x2));
-                            size = 2;
+                            size            = 2;
                             nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum);
                         }
                     }
                 }
 
-                assert(!isHfaArg);//LOONGARCH not support HFA.
+                assert(!isHfaArg); // LOONGARCH not support HFA.
             }
 
             // if run out the fp argument register, try the int argument register.
@@ -3268,9 +3270,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                     // We also must update intArgRegNum so that we no longer try to
                     // allocate any new general purpose registers for args
                     //
-                    isRegArg = intArgRegNum < maxRegArgs;//the split-struct case.
+                    isRegArg        = intArgRegNum < maxRegArgs; // the split-struct case.
                     nextOtherRegNum = REG_STK;
-                    //assert((intArgRegNum + 1) == maxRegArgs);
+                    // assert((intArgRegNum + 1) == maxRegArgs);
                 }
             }
 #else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64
@@ -3425,10 +3427,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             // This is a register argument - put it in the table
             newArgEntry =
                 call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, byteSize, argAlignBytes, isStructArg,
-                                           isFloatHfa, callIsVararg UNIX_LOONGARCH64_ONLY_ARG(nextOtherRegNum) UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
-                                                           UNIX_AMD64_ABI_ONLY_ARG(structIntRegs)
-                                                               UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs)
-                                                                   UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
+                                           isFloatHfa, callIsVararg UNIX_LOONGARCH64_ONLY_ARG(nextOtherRegNum)
+                                                           UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
+                                                               UNIX_AMD64_ABI_ONLY_ARG(structIntRegs)
+                                                                   UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs)
+                                                                       UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
             newArgEntry->SetIsBackFilled(isBackFilled);
 
             // Set up the next intArgRegNum and fltArgRegNum values.
@@ -3448,7 +3451,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 #endif // FEATURE_ARG_SPLIT
                         assert(!passUsingFloatRegs);
                         assert(size == 2);
-                        //assert(nextOtherRegNum == REG_STK);
+                        // assert(nextOtherRegNum == REG_STK);
                         intArgRegNum = maxRegArgs;
                     }
                     else if ((numFloatFields & 0xf) == 0x0)
@@ -3465,7 +3468,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                     }
                     else if (numFloatFields & 0x6)
                     {
-                        //assert((numFloatFields & 0x2) || (numFloatFields & 0x4));
+                        // assert((numFloatFields & 0x2) || (numFloatFields & 0x4));
                         fltArgRegNum += 1;
                         intArgRegNum += 1;
                     }
@@ -3558,9 +3561,10 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
         {
             newArgEntry->passedByRef = passStructByRef;
 #if defined(TARGET_LOONGARCH64)
-            newArgEntry->argType     = (numFloatFields & 0xe) || (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
+            newArgEntry->argType =
+                (numFloatFields & 0xe) || (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
 #else
-            newArgEntry->argType     = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
+            newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
 #endif
         }
         else
@@ -4474,8 +4478,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
     }
 
 #if FEATURE_MULTIREG_ARGS
-    // Examine 'arg' and setup argValue objClass and structSize
-    //
+// Examine 'arg' and setup argValue objClass and structSize
+//
 #if defined(TARGET_LOONGARCH64)
     const CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg);
     if (objClass == NO_CLASS_HANDLE)
@@ -4490,24 +4494,24 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
         LclVarDsc* varDsc = &lvaTable[varNum];
         assert(varDsc->lvExactSize == 8);
 
-        unsigned offset = arg->AsLclVarCommon()->GetLclOffs();
-        GenTreeFieldList* newArg = nullptr;
-        var_types tmp_type = fgEntryPtr->isPassedInFloatRegisters() ? TYP_FLOAT : TYP_INT;
-        arg->gtType = tmp_type;
+        unsigned          offset   = arg->AsLclVarCommon()->GetLclOffs();
+        GenTreeFieldList* newArg   = nullptr;
+        var_types         tmp_type = fgEntryPtr->isPassedInFloatRegisters() ? TYP_FLOAT : TYP_INT;
+        arg->gtType                = tmp_type;
 
         newArg = new (this, GT_FIELD_LIST) GenTreeFieldList();
         newArg->AddField(this, arg, offset, tmp_type);
-        tmp_type = isValidFloatArgReg(fgEntryPtr->GetOtherRegNum()) ? TYP_FLOAT : TYP_INT;
+        tmp_type            = isValidFloatArgReg(fgEntryPtr->GetOtherRegNum()) ? TYP_FLOAT : TYP_INT;
         GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type, offset + 4);
         newArg->AddField(this, nextLclFld, offset + 4, tmp_type);
 
         return newArg;
     }
 #else
-    const CORINFO_CLASS_HANDLE objClass   = gtGetStructHandle(arg);
+    const CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(arg);
 #endif
-    GenTree*                   argValue   = arg; // normally argValue will be arg, but see right below
-    unsigned                   structSize = 0;
+    GenTree* argValue   = arg; // normally argValue will be arg, but see right below
+    unsigned structSize = 0;
 
     if (arg->TypeGet() != TYP_STRUCT)
     {
@@ -4929,12 +4933,12 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                 else if (numFloatFields & 0x2)
                 {
                     tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                    //tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;type[0]
+                    // tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;type[0]
                     tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT;
                 }
                 else if (numFloatFields & 0x4)
                 {
-                    //tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT;
+                    // tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT;
                     tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT;
                     tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
                 }
@@ -4943,7 +4947,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                     assert(!"----------------unimplemented type-case... on LOONGARCH");
                     unreached();
                 }
-                elemSize = numFloatFields & 0x30 ? 8 : 4;;
+                elemSize = numFloatFields & 0x30 ? 8 : 4;
 
                 GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset);
                 newArg->AddField(this, nextLclFld, offset, tmp_type_1);
@@ -4959,7 +4963,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                 if (elemCount > 1)
                 {
                     assert(elemCount == 2);
-                    elemSize = genTypeSize(type[1]);
+                    elemSize   = genTypeSize(type[1]);
                     nextLclFld = gtNewLclFldNode(varNum, type[1], offset + elemSize);
                     newArg->AddField(this, nextLclFld, offset + elemSize, type[1]);
                 }
@@ -5018,12 +5022,12 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                 else if (numFloatFields & 0x2)
                 {
                     tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                    //tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;
+                    // tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;
                     tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT;
                 }
                 else if (numFloatFields & 0x4)
                 {
-                    //tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT;
+                    // tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT;
                     tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT;
                     tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
                 }
@@ -5032,7 +5036,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                     assert(!"----------------unimplemented type-case... on LOONGARCH");
                     unreached();
                 }
-                elemSize = numFloatFields & 0x30 ? 8 : 4;;
+                elemSize = numFloatFields & 0x30 ? 8 : 4;
 
                 GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr);
                 // For safety all GT_IND should have at least GT_GLOB_REF set.
@@ -5041,7 +5045,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                 newArg = new (this, GT_FIELD_LIST) GenTreeFieldList();
                 newArg->AddField(this, curItem, 0, tmp_type_1);
 
-                //GenTree* curAddr = baseAddr;
+                // GenTree* curAddr = baseAddr;
                 baseAddr = gtCloneExpr(baseAddr);
                 noway_assert(baseAddr != nullptr);
                 baseAddr = gtNewOperNode(GT_ADD, addrType, baseAddr, gtNewIconNode(elemSize, TYP_I_IMPL));
@@ -5054,26 +5058,28 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             }
             else
 #endif
-            for (unsigned inx = 0; inx < elemCount; inx++)
             {
-                GenTree* curAddr = baseAddr;
-                if (offset != 0)
-                {
-                    GenTree* baseAddrDup = gtCloneExpr(baseAddr);
-                    noway_assert(baseAddrDup != nullptr);
-                    curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
-                }
-                else
+                for (unsigned inx = 0; inx < elemCount; inx++)
                 {
-                    curAddr = baseAddr;
-                }
-                GenTree* curItem = gtNewIndir(type[inx], curAddr);
+                    GenTree* curAddr = baseAddr;
+                    if (offset != 0)
+                    {
+                        GenTree* baseAddrDup = gtCloneExpr(baseAddr);
+                        noway_assert(baseAddrDup != nullptr);
+                        curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
+                    }
+                    else
+                    {
+                        curAddr = baseAddr;
+                    }
+                    GenTree* curItem = gtNewIndir(type[inx], curAddr);
 
-                // For safety all GT_IND should have at least GT_GLOB_REF set.
-                curItem->gtFlags |= GTF_GLOB_REF;
+                    // For safety all GT_IND should have at least GT_GLOB_REF set.
+                    curItem->gtFlags |= GTF_GLOB_REF;
 
-                newArg->AddField(this, curItem, offset, type[inx]);
-                offset += genTypeSize(type[inx]);
+                    newArg->AddField(this, curItem, offset, type[inx]);
+                    offset += genTypeSize(type[inx]);
+                }
             }
         }
     }
@@ -5777,7 +5783,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
             arrLen = gtNewCastNode(bndsChkType, arrLen, true, bndsChkType);
         }
 #else
-        GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB);
+        GenTree* arrLen                  = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB);
 #endif
 
         GenTreeBoundsChk* arrBndsChk = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(index, arrLen, SCK_RNGCHK_FAIL);
@@ -18031,9 +18037,10 @@ GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr)
 //
 void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZero)
 {
-    // We expect 'addr' to be an address at this point.
+// We expect 'addr' to be an address at this point.
 #ifdef TARGET_LOONGARCH64
-    assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT || addr->TypeGet() == TYP_REF);
+    assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT ||
+           addr->TypeGet() == TYP_REF);
 #else
     assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF);
 #endif
diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp
index 16c3f051f74bf..755dd28915684 100644
--- a/src/coreclr/jit/register_arg_convention.cpp
+++ b/src/coreclr/jit/register_arg_convention.cpp
@@ -44,11 +44,11 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
         nextReg(TYP_FLOAT, numRegs);
 #elif defined(TARGET_LOONGARCH64)
         // LA-ABI64.
-        if (numRegs > MAX_PASS_MULTIREG_BYTES/TARGET_POINTER_SIZE)
+        if (numRegs > MAX_PASS_MULTIREG_BYTES / TARGET_POINTER_SIZE)
         {
             assert(varTypeIsStruct(type));
-            nextReg(TYP_INT, 1);//TYP_BYREF
-        }//TODO:struct-float.
+            nextReg(TYP_INT, 1); // TYP_BYREF
+        }                        // TODO:struct-float.
         else
         {
             nextReg(type, numRegs);
diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h
index a200b5686a7df..f2bc7875152b6 100644
--- a/src/coreclr/jit/regset.h
+++ b/src/coreclr/jit/regset.h
@@ -127,7 +127,7 @@ class RegSet
     regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
 #elif defined(TARGET_LOONGARCH64)
     regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
-#endif                           // TARGET_ARM
+#endif // TARGET_ARM
 
 public:                    // TODO-Cleanup: Should be private, but Compiler uses it
     regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty)
diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp
index 03ddbdfd7585c..2e315d2faa84b 100644
--- a/src/coreclr/jit/scopeinfo.cpp
+++ b/src/coreclr/jit/scopeinfo.cpp
@@ -1609,7 +1609,7 @@ void CodeGen::psiBegProlog()
                     noway_assert(EA_SIZE_IN_BYTES(lclVarDsc->lvSize()) <= 16);
                     if (emitter::isFloatReg(lclVarDsc->GetArgReg()))
                     {
-                        //regType = lclVarDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
+                        // regType = lclVarDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
                         regType = TYP_DOUBLE;
                     }
                     else
diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h
index e78b74616ce41..97d9a69328483 100644
--- a/src/coreclr/jit/target.h
+++ b/src/coreclr/jit/target.h
@@ -34,7 +34,8 @@ inline bool compMacOsArm64Abi()
 }
 inline bool compFeatureArgSplit()
 {
-    return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 || (TargetOS::IsWindows && TargetArchitecture::IsArm64);
+    return TargetArchitecture::IsLoongArch64 || TargetArchitecture::IsArm32 ||
+           (TargetOS::IsWindows && TargetArchitecture::IsArm64);
 }
 inline bool compUnixX86Abi()
 {
diff --git a/src/coreclr/jit/targetloongarch64.cpp b/src/coreclr/jit/targetloongarch64.cpp
index 92f076eba3388..08c2ed857231a 100644
--- a/src/coreclr/jit/targetloongarch64.cpp
+++ b/src/coreclr/jit/targetloongarch64.cpp
@@ -15,8 +15,8 @@
 
 #include "target.h"
 
-const char*            Target::g_tgtCPUName  = "loongarch64";
-const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L;
+const char*            Target::g_tgtCPUName           = "loongarch64";
+const Target::ArgOrder Target::g_tgtArgOrder          = ARG_ORDER_R2L;
 const Target::ArgOrder Target::g_tgtUnmanagedArgOrder = ARG_ORDER_R2L;
 
 // clang-format off
diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp
index ffb7cf71d886b..6ad60a064f35c 100644
--- a/src/coreclr/jit/unwind.cpp
+++ b/src/coreclr/jit/unwind.cpp
@@ -412,7 +412,8 @@ UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func)
     else
     {
         if (TargetArchitecture::IsX64 ||
-            (TargetOS::IsUnix && (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86 || TargetArchitecture::IsLoongArch64)))
+            (TargetOS::IsUnix &&
+             (TargetArchitecture::IsArmArch || TargetArchitecture::IsX86 || TargetArchitecture::IsLoongArch64)))
         {
             assert(func->startLoc != nullptr);
             offset = func->startLoc->GetFuncletPrologOffset(GetEmitter());
diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp
index eae92c102e381..00ffa5482185d 100644
--- a/src/coreclr/jit/unwindloongarch64.cpp
+++ b/src/coreclr/jit/unwindloongarch64.cpp
@@ -360,7 +360,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset)
     }
 #endif // TARGET_UNIX
     int z = offset / 8;
-    //assert(0 <= z && z <= 0xFF);
+    // assert(0 <= z && z <= 0xFF);
 
     UnwindInfo* pu = &funCurrentFunc()->uwi;
 
@@ -368,8 +368,8 @@ void Compiler::unwindSaveReg(regNumber reg, int offset)
     {
         // save_reg: 11010000 | 000xxxxx | zzzzzzzz: save reg r(1 + #X) at [sp + #Z * 8], offset <= 2047
 
-        assert(reg == REG_RA || reg == REG_FP ||    // first legal register: RA
-               (REG_S0 <= reg && reg <= REG_S8));   // last legal register: S8
+        assert(reg == REG_RA || reg == REG_FP ||  // first legal register: RA
+               (REG_S0 <= reg && reg <= REG_S8)); // last legal register: S8
 
         BYTE x = (BYTE)(reg - REG_RA);
         assert(0 <= x && x <= 0x1E);
@@ -381,7 +381,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset)
         // save_freg: 11011100 | 0xxxzzzz | zzzzzzzz : save reg f(24 + #X) at [sp + #Z * 8], offset <= 2047
 
         assert(REG_F24 <= reg && // first legal register: F24
-               reg <= REG_F31); // last legal register: F31
+               reg <= REG_F31);  // last legal register: F31
 
         BYTE x = (BYTE)(reg - REG_F24);
         assert(0 <= x && x <= 0x7);
@@ -392,7 +392,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset)
 
 void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset)
 {
-    //TODO:temp not used on loongarch64.
+    // TODO:temp not used on loongarch64.
     assert(!"unimplemented on LOONGARCH yet");
 #if 0
     UnwindInfo* pu = &funCurrentFunc()->uwi;
@@ -473,22 +473,22 @@ unsigned GetUnwindSizeFromUnwindHeader(BYTE b1)
 {
     static BYTE s_UnwindSize[256] = {
         // array of unwind sizes, in bytes (as specified in the LOONGARCH unwind specification)
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 00-0F
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 10-1F
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 20-2F
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 30-3F
-        2, 2, 2, 2, 2, 2, 2, 2,    2, 2, 2, 2, 2, 2, 2, 2, // 40-4F
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 50-5F
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 60-6F
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 70-7F
-        2, 2, 2, 2, 2, 2, 2, 2,    2, 2, 2, 2, 2, 2, 2, 2, // 80-8F
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // 90-9F
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // A0-AF
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // B0-BF
-        2, 2, 2, 2, 2, 2, 2, 2,    3, 2, 2, 2, 3, 2, 2, 2, // C0-CF
-        3, 2, 2, 2, 2, 2, 3, 2,    3, 2, 3, 2, 3, 2, 2, 1, // D0-DF
-        4, 1, 3, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1, // E0-EF
-        1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1  // F0-FF
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 40-4F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 80-8F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90-9F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0-AF
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0-BF
+        2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 2, 2, 2, // C0-CF
+        3, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 2, 2, 1, // D0-DF
+        4, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E0-EF
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  // F0-FF
     };
 
     unsigned size = s_UnwindSize[b1];
@@ -891,8 +891,8 @@ void DumpUnwindInfo(Compiler*         comp,
 
             x = ((DWORD)(b2 & 0x1F) << 8) | (DWORD)b3;
 
-            printf("    %02X %02X %02X      add_fp %u (0x%02X); addi.d %s, sp, #%u\n", b1, b2, b3, x, x, getRegName(REG_FP),
-                   x * 8);
+            printf("    %02X %02X %02X      add_fp %u (0x%02X); addi.d %s, sp, #%u\n", b1, b2, b3, x, x,
+                   getRegName(REG_FP), x * 8);
         }
         else if (b1 == 0xE3)
         {
@@ -1227,12 +1227,12 @@ int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi)
         return -1;
     }
 
-    int matchIndex = 0;//Size() - pEpi->Size();
+    int matchIndex = 0; // Size() - pEpi->Size();
 
     BYTE* pProlog = GetCodes();
     BYTE* pEpilog = pEpi->GetCodes();
 
-    //First check set_fp.
+    // First check set_fp.
     if (0 < pEpi->Size())
     {
         if (*pProlog == 0xE1)
@@ -1783,7 +1783,7 @@ void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
     }
 #endif
 
-// Compute the header
+    // Compute the header
 
     noway_assert((functionLength & 3) == 0);
     DWORD headerFunctionLength = functionLength / 4;
@@ -1791,7 +1791,7 @@ void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
     DWORD headerVers = 0; // Version of the unwind info is zero. No other version number is currently defined.
     DWORD headerXBit = 0; // We never generate "exception data", but the VM might add some.
     DWORD headerEBit;
-    DWORD headerEpilogCount;                        // This depends on how we set headerEBit.
+    DWORD headerEpilogCount; // This depends on how we set headerEBit.
     DWORD headerCodeWords;
     DWORD headerExtendedEpilogCount = 0; // This depends on how we set headerEBit.
     DWORD headerExtendedCodeWords   = 0;
@@ -2107,7 +2107,8 @@ void UnwindInfo::HotColdSplitCodes(UnwindInfo* puwi)
 // expand!) during issuing (although this is extremely rare in any case, and may not
 // actually occur on LOONGARCH), so we don't finalize actual sizes or offsets.
 //
-// LOONGARCH64 has very similar limitations, except functions can be up to 1MB. TODO-LOONGARCH64-Bug?: make sure this works!
+// LOONGARCH64 has very similar limitations, except functions can be up to 1MB. TODO-LOONGARCH64-Bug?: make sure this
+// works!
 //
 // We don't split any prolog or epilog. Ideally, we might not split an instruction,
 // although that doesn't matter because the unwind at any point would still be
@@ -2200,7 +2201,7 @@ void UnwindInfo::Split()
 
     // Call the emitter to do the split, and call us back for every split point it chooses.
     uwiComp->GetEmitter()->emitSplit(uwiFragmentLast->ufiEmitLoc, uwiEndLoc, maxFragmentSize, (void*)this,
-                                   EmitSplitCallback);
+                                     EmitSplitCallback);
 
 #ifdef DEBUG
     // Did the emitter split the function/funclet into as many fragments as we asked for?
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index dfad0b8d9e4dd..078de7524a3a4 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -337,10 +337,10 @@ void dspRegMask(regMaskTP regMask, size_t minSiz)
                      || (regNum == REG_R28))                         // last register before FP
 #elif defined(TARGET_LOONGARCH64)
             else if ((regNum == REG_INT_LAST) || (regNum == REG_X0))
-#else                                                                // TARGET_LOONGARCH64
+#else  // TARGET_LOONGARCH64
             // We've already printed a register. Is this the end of a range?
             else if (regNum == REG_INT_LAST)
-#endif                                                               // TARGET_LOONGARCH64
+#endif // TARGET_LOONGARCH64
             {
                 const char* nam = getRegName(regNum);
                 printf("%s%s", sep, nam);
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index a267d32fbc44e..bd03c4cbaad02 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -58,7 +58,7 @@ struct FloatTraits
 #elif defined(TARGET_ARMARCH)
         unsigned           bits = 0x7FC00000u;
 #elif defined(TARGET_LOONGARCH64)
-        unsigned bits = 0xFFC00000u;
+        unsigned           bits = 0xFFC00000u;
 #else
 #error Unsupported or unset target architecture
 #endif

From 1e92895507798284668600bb3f0dbf2ce4a574e1 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Tue, 11 Jan 2022 19:22:36 +0800
Subject: [PATCH 10/46] [LoongArch64] update by `git apply format.patch`.

---
 src/coreclr/jit/codegenloongarch64.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index be858336db73d..e3400b54667a7 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1067,8 +1067,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
         assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
         genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
 
-        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE,
-                                    genFuncletInfo.fiSP_to_FPRA_save_delta);
+        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
         compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
 
         GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE,
@@ -1231,8 +1230,7 @@ void CodeGen::genFuncletEpilog()
                                     genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
         compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
 
-        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE,
-                                    genFuncletInfo.fiSP_to_FPRA_save_delta);
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
         compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
 
         // generate daddiu SP,SP,imm

From 348a7f62a6209b046c0fa716a949a8260dadae8d Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Fri, 14 Jan 2022 12:32:41 +0800
Subject: [PATCH 11/46] [LoongArch64] Delete the interface getArgType2. And
 refactor the returned values of `getFieldSizeClassificationByHnd`.

Keep aligned with #62893 `02ff4bfa41d7887b151d381e2697ba82ab6a0bca`.
---
 src/coreclr/jit/compiler.cpp |  6 ++--
 src/coreclr/jit/gentree.cpp  | 26 +++++++--------
 src/coreclr/jit/lclvars.cpp  | 36 ++++++++++----------
 src/coreclr/jit/morph.cpp    | 64 +++++++++++++++++-------------------
 4 files changed, 65 insertions(+), 67 deletions(-)

diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 9ee7ae6ffd183..e06d37bde9912 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -935,14 +935,14 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
 #ifdef TARGET_LOONGARCH64
     if (structSize <= (TARGET_POINTER_SIZE * 2))
     {
-        DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(clsHnd);
+        uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(clsHnd);
 
-        if (numFloatFields & 0x1)
+        if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_ONE)
         {
             howToReturnStruct = SPK_PrimitiveType;
             useType           = structSize > 4 ? TYP_DOUBLE : TYP_FLOAT;
         }
-        else if (numFloatFields & 0xE)
+        else if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
         {
             howToReturnStruct = SPK_ByValue;
             useType           = TYP_STRUCT;
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index b01ab98be43b3..bb6c3f5bb08fd 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -21696,29 +21696,29 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
 #elif defined(TARGET_LOONGARCH64)
             assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE)));
 
-            DWORD numFloatFields = comp->info.compCompHnd->getFieldTypeByHnd(retClsHnd);
+            uint32_t numFloatFields = comp->info.compCompHnd->getFieldSizeClassificationByHnd(retClsHnd);
             BYTE  gcPtrs[2]      = {TYPE_GC_NONE, TYPE_GC_NONE};
             comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
 
-            if (numFloatFields & 0x8)
+            if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO)
             {
-                assert((structSize > 8) == ((numFloatFields & 0x30) > 0));
-                m_regType[0]                = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                m_regType[1]                = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
+                m_regType[0]                = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1]                = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
                 comp->compFloatingPointUsed = true;
             }
-            else if (numFloatFields & 0x2)
+            else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST)
             {
-                assert((structSize > 8) == ((numFloatFields & 0x30) > 0));
-                m_regType[0]                = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                m_regType[1]                = numFloatFields & 0x20 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
+                assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
+                m_regType[0]                = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1]                = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
                 comp->compFloatingPointUsed = true;
             }
-            else if (numFloatFields & 0x4)
+            else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND)
             {
-                assert((structSize > 8) == ((numFloatFields & 0x30) > 0));
-                m_regType[0]                = numFloatFields & 0x10 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
-                m_regType[1]                = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
+                m_regType[0]                = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
+                m_regType[1]                = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
                 comp->compFloatingPointUsed = true;
             }
             else
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index ce652f10e91fe..1e7906375bc9a 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -643,13 +643,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         LclVarDsc*           varDsc  = varDscInfo->varDsc;
         CORINFO_CLASS_HANDLE typeHnd = nullptr;
 
-#if defined(TARGET_LOONGARCH64)
-        int                flags = 0;
-        CorInfoTypeWithMod corInfoType =
-            info.compCompHnd->getArgType2(&info.compMethodInfo->args, argLst, &typeHnd, &flags);
-#else
         CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd);
-#endif
         varDsc->lvIsParam = 1;
 
         lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args);
@@ -675,6 +669,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         bool      isHfaArg = false;
         var_types hfaType  = TYP_UNDEF;
 
+#if defined(TARGET_LOONGARCH64)
+        uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD;
+        if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES))
+        {
+            floatFlags = info.compCompHnd->getFieldSizeClassificationByHnd(typeHnd);
+        }
+#endif
+
         // Methods that use VarArg or SoftFP cannot have HFA arguments except
         // Native varargs on arm64 unix use the regular calling convention.
         if (((TargetOS::IsUnix && TargetArchitecture::IsArm64) || !info.compIsVarArgs) && !opts.compUseSoftFP)
@@ -895,11 +897,11 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
 #elif defined(TARGET_LOONGARCH64)
         var_types arg1_Type = TYP_UNKNOWN;
         var_types arg2_Type = TYP_UNKNOWN;
-        if (flags & 0xf)
+        if (floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK)
         {
             assert(varTypeIsStruct(argType));
             int float_num = 0;
-            if (flags == 1)
+            if (floatFlags == STRUCT_FLOAT_FIELD_ONLY_ONE)
             {
                 assert(argSize <= 8);
                 assert(varDsc->lvExactSize <= argSize);
@@ -908,30 +910,30 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 arg1_Type             = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT;
                 canPassArgInRegisters = varDscInfo->canEnreg(arg1_Type, 1);
             }
-            else if (flags & 0x8)
+            else if (floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
             {
-                arg1_Type             = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT;
-                arg2_Type             = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT;
+                arg1_Type             = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                arg2_Type             = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
                 float_num             = 2;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2);
             }
-            else if (flags & 2)
+            else if (floatFlags & STRUCT_FLOAT_FIELD_FIRST)
             {
                 float_num             = 1;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
                 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
 
-                arg1_Type = (flags & 0x10) ? TYP_DOUBLE : TYP_FLOAT;
-                arg2_Type = (flags & 0x20) ? TYP_LONG : TYP_INT;
+                arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
             }
-            else if (flags & 4)
+            else if (floatFlags & STRUCT_FLOAT_FIELD_SECOND)
             {
                 float_num             = 1;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
                 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
 
-                arg1_Type = (flags & 0x10) ? TYP_LONG : TYP_INT;
-                arg2_Type = (flags & 0x20) ? TYP_DOUBLE : TYP_FLOAT;
+                arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
             }
 
             if (!canPassArgInRegisters)
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index f0e1647414878..2bed706b954ce 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3051,14 +3051,14 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             {
                 assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType));
 
-                numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass);
+                numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass);
 
-                passUsingFloatRegs = (numFloatFields & 0xf) ? true : false;
+                passUsingFloatRegs = (numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false;
                 compFloatingPointUsed |= passUsingFloatRegs;
 
-                if (numFloatFields & 7)
+                if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO))
                     size = 1;
-                else if (numFloatFields & 8)
+                else if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO)
                     size = 2;
             }
             else // if (passStructByRef)
@@ -4917,37 +4917,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             unsigned offset = baseOffset;
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
 #if defined(TARGET_LOONGARCH64)
-            DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass);
-            if ((numFloatFields & 0xe) /*&& (varDsc->lvSize() == TARGET_POINTER_SIZE)*/)
+            uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass);
+            if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
             {
-                assert((numFloatFields & 0xf) > 1);
+                assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);
                 var_types tmp_type_1;
                 var_types tmp_type_2;
 
                 compFloatingPointUsed = true;
-                if (numFloatFields & 0x8)
+                if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO)
                 {
-                    tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
                 }
-                else if (numFloatFields & 0x2)
+                else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST)
                 {
-                    tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                    // tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;type[0]
-                    tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT;
+                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? type[1] : TYP_INT;
                 }
-                else if (numFloatFields & 0x4)
+                else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND)
                 {
-                    // tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT;
-                    tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT;
-                    tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? type[0] : TYP_INT;
+                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
                 }
                 else
                 {
                     assert(!"----------------unimplemented type-case... on LOONGARCH");
                     unreached();
                 }
-                elemSize = numFloatFields & 0x30 ? 8 : 4;
+                elemSize = numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK ? 8 : 4;
 
                 GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset);
                 newArg->AddField(this, nextLclFld, offset, tmp_type_1);
@@ -5006,37 +5004,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
             unsigned offset = 0;
 #if defined(TARGET_LOONGARCH64)
-            DWORD numFloatFields = info.compCompHnd->getFieldTypeByHnd(objClass);
-            if (numFloatFields & 0xe)
+            uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass);
+            if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
             {
-                assert((numFloatFields & 0xf) > 1);
+                assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);
                 var_types tmp_type_1;
                 var_types tmp_type_2;
 
                 compFloatingPointUsed = true;
-                if (numFloatFields & 0x8)
+                if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO)
                 {
-                    tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
                 }
-                else if (numFloatFields & 0x2)
+                else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND)
                 {
-                    tmp_type_1 = numFloatFields & 0x10 ? TYP_DOUBLE : TYP_FLOAT;
-                    // tmp_type_2 = numFloatFields & 0x20 ? TYP_LONG: TYP_INT;
-                    tmp_type_2 = numFloatFields & 0x20 ? type[1] : TYP_INT;
+                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? type[1] : TYP_INT;
                 }
-                else if (numFloatFields & 0x4)
+                else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND)
                 {
-                    // tmp_type_1 = numFloatFields & 0x10 ? TYP_LONG: TYP_INT;
-                    tmp_type_1 = numFloatFields & 0x10 ? type[0] : TYP_INT;
-                    tmp_type_2 = numFloatFields & 0x20 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? type[0] : TYP_INT;
+                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
                 }
                 else
                 {
                     assert(!"----------------unimplemented type-case... on LOONGARCH");
                     unreached();
                 }
-                elemSize = numFloatFields & 0x30 ? 8 : 4;
+                elemSize = numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK ? 8 : 4;
 
                 GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr);
                 // For safety all GT_IND should have at least GT_GLOB_REF set.

From a39969559566cd3382b07f1986714b1d40accbfd Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 14 Jan 2022 13:27:05 +0800
Subject: [PATCH 12/46] [LoongArch64] update code by `git apply format.patch`
 And update `getLoongArch64PassStructInRegisterFlags`.

---
 src/coreclr/jit/compiler.cpp |  2 +-
 src/coreclr/jit/gentree.cpp  | 12 ++++++------
 src/coreclr/jit/lclvars.cpp  |  6 +++---
 src/coreclr/jit/morph.cpp    |  6 +++---
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index e06d37bde9912..75e7053b94825 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -935,7 +935,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
 #ifdef TARGET_LOONGARCH64
     if (structSize <= (TARGET_POINTER_SIZE * 2))
     {
-        uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(clsHnd);
+        uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd);
 
         if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_ONE)
         {
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index bb6c3f5bb08fd..44c25eaa8f80f 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -21696,8 +21696,8 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
 #elif defined(TARGET_LOONGARCH64)
             assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE)));
 
-            uint32_t numFloatFields = comp->info.compCompHnd->getFieldSizeClassificationByHnd(retClsHnd);
-            BYTE  gcPtrs[2]      = {TYPE_GC_NONE, TYPE_GC_NONE};
+            uint32_t numFloatFields = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(retClsHnd);
+            BYTE     gcPtrs[2]      = {TYPE_GC_NONE, TYPE_GC_NONE};
             comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
 
             if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO)
@@ -21710,15 +21710,15 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
             else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST)
             {
                 assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
-                m_regType[0]                = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
-                m_regType[1]                = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
+                m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
                 comp->compFloatingPointUsed = true;
             }
             else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND)
             {
                 assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
-                m_regType[0]                = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
-                m_regType[1]                = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
+                m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
                 comp->compFloatingPointUsed = true;
             }
             else
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 1e7906375bc9a..9b0607a104609 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -610,7 +610,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
     }
 #elif defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
     // On System V type environment the float registers are not indexed together with the int ones.
-    varDscInfo->floatRegArgNum         = varDscInfo->intRegArgNum;
+    varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum;
 #endif // TARGET*
 
     CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args;
@@ -644,7 +644,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         CORINFO_CLASS_HANDLE typeHnd = nullptr;
 
         CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd);
-        varDsc->lvIsParam = 1;
+        varDsc->lvIsParam              = 1;
 
         lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args);
 
@@ -673,7 +673,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD;
         if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES))
         {
-            floatFlags = info.compCompHnd->getFieldSizeClassificationByHnd(typeHnd);
+            floatFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(typeHnd);
         }
 #endif
 
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 2bed706b954ce..5a24f80721d30 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3051,7 +3051,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             {
                 assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType));
 
-                numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass);
+                numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
 
                 passUsingFloatRegs = (numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false;
                 compFloatingPointUsed |= passUsingFloatRegs;
@@ -4917,7 +4917,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             unsigned offset = baseOffset;
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
 #if defined(TARGET_LOONGARCH64)
-            uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass);
+            uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
             if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
             {
                 assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);
@@ -5004,7 +5004,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
             unsigned offset = 0;
 #if defined(TARGET_LOONGARCH64)
-            uint32_t numFloatFields = info.compCompHnd->getFieldSizeClassificationByHnd(objClass);
+            uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
             if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
             {
                 assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);

From b2b53d270ee9516e9eea0531b0a8b5bafe019e65 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 14 Jan 2022 15:10:01 +0800
Subject: [PATCH 13/46] [LoongArch64] Fixed the error when passing float-arg by
 integer-reg.

---
 src/coreclr/jit/codegenlinear.cpp      | 7 +++++++
 src/coreclr/jit/codegenloongarch64.cpp | 2 +-
 src/coreclr/jit/importer.cpp           | 2 ++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index 92043be1edc67..e8ae2155ed1ca 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -1230,6 +1230,13 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
                 assert(!varTypeIsGC(varDsc));
                 spillType = lclActualType;
             }
+
+#if defined(TARGET_LOONGARCH64)
+            if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum()))
+            {
+                spillType = spillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
+            }
+#endif
 #elif defined(TARGET_ARM)
 // No normalizing for ARM
 #else
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index e3400b54667a7..6291b360bbfb7 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -4563,7 +4563,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
         {
             assert(reg != REG_R21);
             ssize_t imm = op2->AsIntCon()->gtIconVal;
-            if ((tree->gtFlags & GTF_UNSIGNED) && (attr == EA_4BYTE))
+            if (attr == EA_4BYTE)
             {
                 assert(reg != REG_RA);
                 imm = (int32_t)imm;
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index dd89a0a063de4..08bc8cac5598d 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -14227,6 +14227,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 if (!callNode && prevOpcode == CEE_LDC_I4_0)
                 {
                     assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0);
+                    if (varTypeIsFloating(lclTyp))
+                        op1->gtOper = GT_CNS_DBL;
                     op1->gtType = genActualType(lclTyp);
                     impPushOnStack(op1, tiRetVal);
                     // opcode = CEE_LDC_I4_0;

From b5b60cb11e039dcd4d3b35e1e8d1377ce0a863f3 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Tue, 18 Jan 2022 15:21:52 +0800
Subject: [PATCH 14/46] [Loongarch64] amend patch formate by 'git apply
 format.patch'

---
 src/coreclr/jit/importer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 08bc8cac5598d..48a2b23068260 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -14229,7 +14229,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0);
                     if (varTypeIsFloating(lclTyp))
                         op1->gtOper = GT_CNS_DBL;
-                    op1->gtType = genActualType(lclTyp);
+                    op1->gtType     = genActualType(lclTyp);
                     impPushOnStack(op1, tiRetVal);
                     // opcode = CEE_LDC_I4_0;
                     break;

From 8ef00ba1535d0f71f52b84b9b5fe85788e93985c Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Wed, 19 Jan 2022 09:42:25 +0800
Subject: [PATCH 15/46] [LoongArch64] update the version of the `LICENSE
 description`.

---
 src/coreclr/jit/codegenloongarch64.cpp | 3 ---
 src/coreclr/jit/emitfmtsloongarch64.h  | 3 ---
 src/coreclr/jit/emitloongarch64.cpp    | 3 ---
 src/coreclr/jit/emitloongarch64.h      | 3 ---
 src/coreclr/jit/instrsloongarch64.h    | 3 ---
 src/coreclr/jit/lowerloongarch64.cpp   | 3 ---
 src/coreclr/jit/lsraloongarch64.cpp    | 3 ---
 src/coreclr/jit/targetloongarch64.cpp  | 3 ---
 src/coreclr/jit/unwindloongarch64.cpp  | 2 --
 9 files changed, 26 deletions(-)

diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 6291b360bbfb7..796c40b465af0 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1,8 +1,5 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-// Copyright (c) Loongson Technology. All rights reserved.
 
 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h
index b4232269b144f..e04d60270d567 100644
--- a/src/coreclr/jit/emitfmtsloongarch64.h
+++ b/src/coreclr/jit/emitfmtsloongarch64.h
@@ -1,8 +1,5 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-// Copyright (c) Loongson Technology. All rights reserved.
 
 //////////////////////////////////////////////////////////////////////////////
 
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index e58ccb61282bb..c22d729f62a60 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -1,8 +1,5 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.//emitarm64.cpp deletes this line.
-
-// Copyright (c) Loongson Technology. All rights reserved.
 
 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
index e9cc1e9d831d7..85841251de82a 100644
--- a/src/coreclr/jit/emitloongarch64.h
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -1,8 +1,5 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-// Copyright (c) Loongson Technology. All rights reserved.
 
 #if defined(TARGET_LOONGARCH64)
 
diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h
index f171bc69a7a68..3e54bce650d88 100644
--- a/src/coreclr/jit/instrsloongarch64.h
+++ b/src/coreclr/jit/instrsloongarch64.h
@@ -1,8 +1,5 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-// Copyright (c) Loongson Technology. All rights reserved.
 
 /*****************************************************************************
  *  Loongarch64 instructions for JIT compiler
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 82809712de064..692c9d0fe408e 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -1,8 +1,5 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-// Copyright (c) Loongson Technology. All rights reserved.
 
 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 6d056a46d737d..801b863758edd 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -1,8 +1,5 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-// Copyright (c) Loongson Technology. All rights reserved.
 
 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
diff --git a/src/coreclr/jit/targetloongarch64.cpp b/src/coreclr/jit/targetloongarch64.cpp
index 08c2ed857231a..e0097a1b62a1c 100644
--- a/src/coreclr/jit/targetloongarch64.cpp
+++ b/src/coreclr/jit/targetloongarch64.cpp
@@ -1,8 +1,5 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-// Copyright (c) Loongson Technology. All rights reserved.
 
 /*****************************************************************************/
 
diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp
index 00ffa5482185d..110c88985a510 100644
--- a/src/coreclr/jit/unwindloongarch64.cpp
+++ b/src/coreclr/jit/unwindloongarch64.cpp
@@ -1,8 +1,6 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-// Copyright (c) Loongson Technology. All rights reserved.
-
 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XX                                                                           XX

From cadce2c59336cfe48d36985f55f68e20f52aff69 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Thu, 10 Feb 2022 17:09:05 +0800
Subject: [PATCH 16/46] [LoongArch64] amend the
 CodeGen::genFnPrologCalleeRegArgs for the SC_IG_BUFFER_SIZE.

---
 src/coreclr/jit/codegencommon.cpp | 63 ++++++++++++++++++-------------
 src/coreclr/jit/emit.h            |  4 --
 src/coreclr/jit/instr.cpp         | 15 ++++----
 3 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 3a61fc4cbaed5..dc91f7c690795 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -3448,21 +3448,20 @@ void CodeGen::genFnPrologCalleeRegArgs()
             }
             else
             {
-                if (tmp_reg == REG_NA)
+                assert(tmp_reg == REG_NA);
+
+                tmp_offset = base;
+                tmp_reg    = REG_R21;
+                if ((0 < base) && (base <= 0xfff))
                 {
-                    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-                    tmp_offset     = base;
-                    tmp_reg        = REG_R21;
-                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
-                    GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
-                    GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
-                    GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8);
+                    GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset);
                 }
                 else
                 {
-                    baseOffset = -(base - tmp_offset) - 8;
-                    GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                    GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
                 }
+                GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8);
             }
 
             regArgMaskLive &= ~genRegMask(srcRegNum);
@@ -3503,18 +3502,24 @@ void CodeGen::genFnPrologCalleeRegArgs()
                     {
                         if (tmp_reg == REG_NA)
                         {
-                            regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-                            tmp_offset     = base;
-                            tmp_reg        = REG_R21;
-                            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
-                            GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
-                            GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
-                            GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, -8);
+                            tmp_offset = base;
+                            tmp_reg    = REG_R21;
+                            if ((0 < base) && (base <= 0xfff))
+                            {
+                                GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset);
+                            }
+                            else
+                            {
+                                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                                GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
+                            }
+                            GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8);
                         }
                         else
                         {
                             baseOffset = -(base - tmp_offset) - 8;
-                            GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+                            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R21, 8);
+                            GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, baseOffset);
                         }
                     }
                     regArgMaskLive &= ~genRegMask(srcRegNum); // maybe do this later is better!
@@ -3535,18 +3540,24 @@ void CodeGen::genFnPrologCalleeRegArgs()
                     {
                         if (tmp_reg == REG_NA)
                         {
-                            regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
-                            tmp_offset     = base;
-                            tmp_reg        = REG_R21;
-                            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
-                            GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
-                            GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
-                            GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, -8);
+                            tmp_offset = base;
+                            tmp_reg    = REG_R21;
+                            if ((0 < base) && (base <= 0xfff))
+                            {
+                                GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset);
+                            }
+                            else
+                            {
+                                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                                GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
+                            }
+                            GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, -8);
                         }
                         else
                         {
                             baseOffset = -(base - tmp_offset) - 8;
-                            GetEmitter()->emitIns_S_R(INS_st_d, size, REG_ARG_LAST, varNum, baseOffset);
+                            GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R21, 8);
+                            GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, baseOffset);
                         }
                     }
                 }
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 15b84ae2b4cec..891ad9766af07 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -1944,11 +1944,7 @@ class emitter
 
 #elif defined(TARGET_LOONGARCH64)
 
-#ifdef DEBUG
-#define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE)
-#else
 #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 20 * SMALL_IDSC_SIZE)
-#endif
 
 #else // !TARGET_LOONGARCH64
 #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE)
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index e80855be507dc..57089672ac279 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -1917,6 +1917,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType)
  *  Parameters
  *      dstType   - destination type
  *      aligned   - whether destination is properly aligned if dstType is a SIMD type
+ *                - for LoongArch64 aligned is used for store-index.
  */
 instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false*/)
 {
@@ -1977,11 +1978,11 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
     {
         if (dstType == TYP_DOUBLE)
         {
-            return INS_fst_d;
+            return aligned ? INS_fstx_d : INS_fst_d;
         }
         else if (dstType == TYP_FLOAT)
         {
-            return INS_fst_s;
+            return aligned ? INS_fstx_s : INS_fst_s;
         }
     }
 #else
@@ -2000,13 +2001,13 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
         ins = INS_strh;
 #elif defined(TARGET_LOONGARCH64)
     if (varTypeIsByte(dstType))
-        ins = INS_st_b;
+        ins = aligned ? INS_stx_b : INS_st_b;
     else if (varTypeIsShort(dstType))
-        ins = INS_st_h;
+        ins = aligned ? INS_stx_h : INS_st_h;
     else if ((TYP_INT == dstType) || (TYP_UINT == dstType))
-        ins = INS_st_w;
-    else                // if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType))
-        ins = INS_st_d; // default st_d.
+        ins = aligned ? INS_stx_w : INS_st_w;
+    else // if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType))
+        ins = aligned ? INS_stx_d : INS_st_d;
 #else
     NYI("ins_Store");
 #endif

From 3c79267ec284381a3acd053fda7499944ac7598a Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Tue, 15 Feb 2022 11:37:10 +0800
Subject: [PATCH 17/46] [LoongArch64]: update the crossgen2 within the JIT.

---
 src/coreclr/jit/emitloongarch64.cpp | 39 +++++++++++++----------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index c22d729f62a60..4c90765f7609f 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -2225,7 +2225,7 @@ void emitter::emitIns_R_AI(instruction ins,
     //   addi_d  reg, reg, off-lo-12bits
     //  case:EA_PTR_DSP_RELOC
     //   pcaddu12i  reg, off-hi-20bits
-    //   ldptr_d  reg, reg, off-lo-12bits
+    //   ld_d  reg, reg, off-lo-12bits
 
     instrDesc* id = emitNewInstr(attr);
 
@@ -2853,15 +2853,14 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
         //   pcaddu18i  t2, addr-hi20
         //   jilr r0/1,t2,addr-lo18
 
+        *(code_t*)dst = 0x1e00000e;
+
         long addr = (long)id->idAddr()->iiaAddr; // get addr.
         // should assert(addr-dst < 38bits);
 
         int reg2 = (int)addr & 1;
         addr     = addr ^ 1;
 
-        emitRecordRelocation(dst, (BYTE*)addr, IMAGE_REL_LOONGARCH64_PC);
-
-        *(code_t*)dst = 0x1e00000e;
         dst += 4;
 #ifdef DEBUG
         code = emitInsCode(INS_pcaddu18i);
@@ -2871,6 +2870,8 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
         assert(code == 0x4c000000);
 #endif
         *(code_t*)dst = 0x4c000000 | (14 << 5) | reg2;
+
+        emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_LOONGARCH64_JIR);
     }
     else
     {
@@ -2907,10 +2908,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
         D_INST_JIRL(code, reg2, REG_T2, 0);
     }
 
-    // Now output the call instruction and update the 'dst' pointer
-    //
-    unsigned outputInstrSize = emitOutput_Instr(dst, code);
-    dst += outputInstrSize;
+    dst += 4;
 
     // update volatile regs within emitThisGCrefRegs and emitThisByrefRegs.
     if (gcrefRegs != emitThisGCrefRegs)
@@ -2922,10 +2920,6 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
         emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
     }
 
-    // All call instructions are 4-byte in size on LOONGARCH64
-    // not including delay-slot which processed later.
-    assert(outputInstrSize == callInstrSize);
-
     // If the method returns a GC ref, mark INTRET (A0) appropriately.
     if (id->idGCref() == GCT_GCREF)
     {
@@ -3041,23 +3035,22 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             //   addi_d  reg, reg, off-lo-12bits
             //  case:EA_PTR_DSP_RELOC
             //   pcaddu12i  reg, off-hi-20bits
-            //   ldptr_d  reg, reg, off-lo-12bits
+            //   ld_d  reg, reg, off-lo-12bits
 
             regNumber reg1 = id->idReg1();
 
-            emitRecordRelocation(dst, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC);
-
             *(code_t*)dst = 0x1c000000 | (code_t)reg1;
-            dst += 4;
+
             dst2 = dst;
+            dst += 4;
 
 #ifdef DEBUG
             code = emitInsCode(INS_pcaddu12i);
             assert(code == 0x1c000000);
             code = emitInsCode(INS_addi_d);
             assert(code == 0x02c00000);
-            code = emitInsCode(INS_ldptr_d);
-            assert(code == 0x26000000);
+            code = emitInsCode(INS_ld_d);
+            assert(code == 0x28c00000);
 #endif
 
             if (id->idIsCnsReloc())
@@ -3065,11 +3058,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 ins           = INS_addi_d;
                 *(code_t*)dst = 0x02c00000 | (code_t)reg1 | (code_t)(reg1 << 5);
             }
-            else // if (id->idIsDspReloc())
+            else
             {
                 assert(id->idIsDspReloc());
-                ins           = INS_ldptr_d;
-                *(code_t*)dst = 0x26000000 | (code_t)reg1 | (code_t)(reg1 << 5);
+                ins           = INS_ld_d;
+                *(code_t*)dst = 0x28c00000 | (code_t)reg1 | (code_t)(reg1 << 5);
             }
 
             if (id->idGCref() != GCT_NONE)
@@ -3083,6 +3076,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
             dst += 4;
 
+            emitRecordRelocation(dst2, id->idAddr()->iiaAddr, IMAGE_REL_LOONGARCH64_PC);
+
+            dst2 += 4;
+
             sz = sizeof(instrDesc);
         }
         break;

From 7192df162f496186e017974513141baab8bb3b2b Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Tue, 15 Feb 2022 15:02:54 +0800
Subject: [PATCH 18/46] [LoongArch64] git-apply the `format.patch`.

---
 src/coreclr/jit/codegencommon.cpp | 6 +++---
 src/coreclr/jit/importer.cpp      | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 2e8907c4c313e..062bbd0eb1678 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -3466,7 +3466,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                 }
                 else
                 {
-                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
                     GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
                 }
                 GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8);
@@ -3518,7 +3518,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                             }
                             else
                             {
-                                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
                                 GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
                             }
                             GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8);
@@ -3556,7 +3556,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                             }
                             else
                             {
-                                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset>>12);
+                                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
                                 GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
                             }
                             GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, -8);
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 0b8c25447387b..d6e424baf0639 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -17948,7 +17948,7 @@ void Compiler::impImportBlock(BasicBlock* block)
 // Spill clique has decided this should be "byref", but this block only pushes an "int".
 // Insert a sign-extension to "native int" so we match the clique size.
 #ifdef TARGET_LOONGARCH64
-            if (tree->gtOper == GT_CNS_INT)
+                if (tree->gtOper == GT_CNS_INT)
                 {
                     tree->gtType = TYP_I_IMPL;
                     tree->SetContained();

From 619c8e89b9ad5b8be62fcabe172b9a7e8f467aba Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Tue, 15 Feb 2022 20:21:23 +0800
Subject: [PATCH 19/46] [LoongArch64] Fix the compiling error after merge-main.

---
 src/coreclr/jit/lowerloongarch64.cpp | 40 ++++++++++++++++++++++++++++
 src/coreclr/jit/lsraloongarch64.cpp  |  3 +--
 src/coreclr/jit/targetloongarch64.h  |  4 +++
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 692c9d0fe408e..7468766b7a052 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -160,6 +160,46 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
     return mul->gtNext;
 }
 
+//------------------------------------------------------------------------
+// LowerBinaryArithmetic: lowers the given binary arithmetic node.
+//
+// Arguments:
+//    node - the arithmetic node to lower
+//
+// Returns:
+//    The next node to lower.
+//
+GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
+{
+    if (comp->opts.OptimizationEnabled() && binOp->OperIs(GT_AND))
+    {
+        GenTree* opNode  = nullptr;
+        GenTree* notNode = nullptr;
+        if (binOp->gtGetOp1()->OperIs(GT_NOT))
+        {
+            notNode = binOp->gtGetOp1();
+            opNode  = binOp->gtGetOp2();
+        }
+        else if (binOp->gtGetOp2()->OperIs(GT_NOT))
+        {
+            notNode = binOp->gtGetOp2();
+            opNode  = binOp->gtGetOp1();
+        }
+
+        if (notNode != nullptr)
+        {
+            binOp->gtOp1 = opNode;
+            binOp->gtOp2 = notNode->AsUnOp()->gtGetOp1();
+            binOp->ChangeOper(GT_AND_NOT);
+            BlockRange().Remove(notNode);
+        }
+    }
+
+    ContainCheckBinary(binOp);
+
+    return binOp->gtNext;
+}
+
 //------------------------------------------------------------------------
 // LowerStoreLoc: Lower a store of a lclVar
 //
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 801b863758edd..66745063b96dd 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -526,7 +526,6 @@ int LinearScan::BuildNode(GenTree* tree)
         break;
 
         case GT_BLK:
-        case GT_DYN_BLK:
             // These should all be eliminated prior to Lowering.
             assert(!"Non-store block node in Lowering");
             srcCount = 0;
@@ -1681,7 +1680,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
     if (blkNode->OperIs(GT_STORE_DYN_BLK))
     {
         useCount++;
-        BuildUse(blkNode->AsDynBlk()->gtDynamicSize, sizeRegMask);
+        BuildUse(blkNode->AsStoreDynBlk()->gtDynamicSize, sizeRegMask);
     }
 
     buildInternalRegisterUses();
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
index cf97f4148cf16..2c6153f6579fd 100644
--- a/src/coreclr/jit/targetloongarch64.h
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -262,6 +262,10 @@
   // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
   #define RBM_INIT_PINVOKE_FRAME_TRASH  RBM_CALLEE_TRASH
 
+  #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_A0 | RBM_A1 | RBM_A2 | RBM_A3 | RBM_A4 | RBM_A5 | RBM_A6 | RBM_A7 | RBM_T3))
+  #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_T3
+  #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_T0
+
   #define REG_FPBASE               REG_FP
   #define RBM_FPBASE               RBM_FP
   #define STR_FPBASE               "fp"

From 789c16fb7ab91b574296de2c1cb04cae4717a46e Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Wed, 16 Feb 2022 17:12:58 +0800
Subject: [PATCH 20/46] [LoongArch64] amend the code for reviewing by
 @BruceForstall.

---
 src/coreclr/jit/CMakeLists.txt         |   4 -
 src/coreclr/jit/codegen.h              |  67 +-----------
 src/coreclr/jit/codegencommon.cpp      |  16 +--
 src/coreclr/jit/codegeninterface.h     |  10 +-
 src/coreclr/jit/codegenloongarch64.cpp |  19 ----
 src/coreclr/jit/compiler.cpp           |   6 --
 src/coreclr/jit/compiler.h             |  23 ++---
 src/coreclr/jit/emitjmps.h             |  16 +--
 src/coreclr/jit/emitloongarch64.cpp    |   2 +-
 src/coreclr/jit/gentree.h              |   7 +-
 src/coreclr/jit/instr.cpp              |   7 --
 src/coreclr/jit/jit.h                  |   2 +-
 src/coreclr/jit/lclvars.cpp            |   1 -
 src/coreclr/jit/lowerloongarch64.cpp   |   4 +-
 src/coreclr/jit/lsra.h                 |   6 ++
 src/coreclr/jit/lsrabuild.cpp          |  44 ++++++++
 src/coreclr/jit/regalloc.cpp           |  13 ---
 src/coreclr/jit/registerloongarch64.h  | 135 ++++++++++++-------------
 src/coreclr/jit/regset.cpp             |   8 +-
 src/coreclr/jit/regset.h               |   6 +-
 src/coreclr/jit/target.h               |   6 +-
 src/coreclr/jit/targetloongarch64.h    |  28 ++---
 src/coreclr/jit/unwindloongarch64.cpp  |  90 ++++-------------
 src/coreclr/jit/utils.cpp              |  19 +++-
 src/coreclr/jit/valuenum.cpp           |  15 ++-
 src/coreclr/jit/valuenumfuncs.h        |   2 +-
 26 files changed, 194 insertions(+), 362 deletions(-)

diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt
index 9a073ab2bfb8a..caf0726d970b3 100644
--- a/src/coreclr/jit/CMakeLists.txt
+++ b/src/coreclr/jit/CMakeLists.txt
@@ -242,8 +242,6 @@ set( JIT_LOONGARCH64_SOURCES
   lsraloongarch64.cpp
   targetloongarch64.cpp
   unwindloongarch64.cpp
-  ##hwintrinsiclistloongarch64.cpp         ###TODO:Not implemented on loongarch64 yet.
-  ##simdashwintrinsiclistloongarch64.cpp   ###TODO:Not implemented on loongarch64 yet.
 )
 
 # We include the headers here for better experience in IDEs.
@@ -397,8 +395,6 @@ set( JIT_LOONGARCH64_HEADERS
     emitfmtsloongarch64.h
     instrsloongarch64.h
     registerloongarch64.h
-#hwintrinsiclistloongarch64.h     ###TODO:Not implemented on loongarch64 yet.
-#simdashwintrinsiclistloongarch64.h
 )
 
 convert_to_absolute_path(JIT_SOURCES ${JIT_SOURCES})
diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index b44ed34f09857..60cbb3714dc74 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -278,7 +278,7 @@ class CodeGen final : public CodeGenInterface
     void genClearStackVec3ArgUpperBits();
 #endif // UNIX_AMD64_ABI && FEATURE_SIMD
 
-#if defined(TARGET_ARM64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     bool genInstrWithConstant(instruction ins,
                               emitAttr    attr,
                               regNumber   reg1,
@@ -339,66 +339,6 @@ class CodeGen final : public CodeGenInterface
 
     void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);
 
-#elif defined(TARGET_LOONGARCH64)
-    bool genInstrWithConstant(instruction ins,
-                              emitAttr    attr,
-                              regNumber   reg1,
-                              regNumber   reg2,
-                              ssize_t     imm,
-                              regNumber   tmpReg,
-                              bool        inUnwindRegion = false);
-
-    void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData);
-
-    void genPrologSaveRegPair(regNumber reg1,
-                              regNumber reg2,
-                              int       spOffset,
-                              int       spDelta,
-                              bool      useSaveNextPair,
-                              regNumber tmpReg,
-                              bool*     pTmpRegIsZero);
-
-    void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
-
-    void genEpilogRestoreRegPair(regNumber reg1,
-                                 regNumber reg2,
-                                 int       spOffset,
-                                 int       spDelta,
-                                 bool      useSaveNextPair,
-                                 regNumber tmpReg,
-                                 bool*     pTmpRegIsZero);
-
-    void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
-
-    // A simple struct to keep register pairs for prolog and epilog.
-    struct RegPair
-    {
-        regNumber reg1;
-        regNumber reg2;
-        bool      useSaveNextPair;
-
-        RegPair(regNumber reg1) : reg1(reg1), reg2(REG_NA), useSaveNextPair(false)
-        {
-        }
-
-        RegPair(regNumber reg1, regNumber reg2) : reg1(reg1), reg2(reg2), useSaveNextPair(false)
-        {
-            assert(reg2 == REG_NEXT(reg1));
-        }
-    };
-
-    static void genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* regStack);
-    static void genSetUseSaveNextPairs(ArrayStack<RegPair>* regStack);
-
-    static int genGetSlotSizeForRegsInMask(regMaskTP regsMask);
-
-    void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);
-    void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);
-
-    void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta);
-    void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta);
-
-    void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);
 #else
     void genPushCalleeSavedRegisters();
 #endif
@@ -1338,11 +1278,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genStoreRegToStackArg(var_types type, regNumber reg, int offset);
 #endif // FEATURE_PUT_STRUCT_ARG_STK
 
-#ifdef TARGET_LOONGARCH64
-    // TODO for LOONGARCH64 : maybe delete on LA64?
-    void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset);
-#endif
-
     void genCodeForStoreBlk(GenTreeBlk* storeBlkNode);
 #ifndef TARGET_X86
     void genCodeForInitBlkHelper(GenTreeBlk* initBlkNode);
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 062bbd0eb1678..4f5ad64dce390 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -9460,21 +9460,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
             switch (addrInfo.accessType)
             {
                 case IAT_VALUE:
-                // if (validImmForBAL((ssize_t)addrInfo.addr))
-                //{
-                //    // Simple direct call
-
-                //    //TODO for LA.
-                //    callType   = emitter::EC_FUNC_TOKEN;
-                //    addr       = addrInfo.addr;
-                //    indCallReg = REG_NA;
-                //    break;
-                //}
-
-                //// otherwise the target address doesn't fit in an immediate
-                //// so we have to burn a register...
-                //__fallthrough;
-
+                    //TODO-LOONGARCH64-CQ: using B/BL for optimization.
                 case IAT_PVALUE:
                     // Load the address into a register, load indirect and call  through a register
                     // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use
diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h
index 84d8560545894..f692193104f12 100644
--- a/src/coreclr/jit/codegeninterface.h
+++ b/src/coreclr/jit/codegeninterface.h
@@ -112,9 +112,7 @@ class CodeGenInterface
 private:
 #if defined(TARGET_XARCH)
     static const insFlags instInfo[INS_count];
-#elif defined(TARGET_ARM) || defined(TARGET_ARM64)
-    static const BYTE instInfo[INS_count];
-#elif defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     static const BYTE instInfo[INS_count];
 #else
 #error Unsupported target architecture
@@ -312,11 +310,7 @@ class CodeGenInterface
     bool validImmForAdd(target_ssize_t imm, insFlags flags);
     bool validImmForAlu(target_ssize_t imm);
     bool validImmForMov(target_ssize_t imm);
-#ifdef TARGET_LOONGARCH64
-    bool validImmForBAL(ssize_t addr);
-#else
     bool validImmForBL(ssize_t addr);
-#endif
 
     instruction ins_Load(var_types srcType, bool aligned = false);
     instruction ins_Store(var_types dstType, bool aligned = false);
@@ -387,7 +381,7 @@ class CodeGenInterface
     bool m_cgInterruptible;
 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     bool m_cgHasTailCalls;
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
     //  The following will be set to true if we've determined that we need to
     //  generate a full-blown pointer register map for the current method.
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 796c40b465af0..3d05d9299c273 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -6996,25 +6996,6 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst
     }
 }
 
-// Generate code for a store to some address + offset
-//   base: tree node which can be either a local address or arbitrary node
-//   offset: distance from the base from which to load
-void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
-{
-    emitter* emit = GetEmitter();
-
-    if (base->OperIsLocalAddr())
-    {
-        if (base->gtOper == GT_LCL_FLD_ADDR)
-            offset += base->AsLclFld()->GetLclOffs();
-        emit->emitIns_S_R(ins, size, src, base->AsLclVarCommon()->GetLclNum(), offset);
-    }
-    else
-    {
-        emit->emitIns_R_R_I(ins, size, src, base->GetRegNum(), offset);
-    }
-}
-
 //------------------------------------------------------------------------
 // genCallInstruction: Produce code for a GT_CALL node
 //
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index b5020f14de6e8..9c68c153e33b1 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -2321,10 +2321,6 @@ void Compiler::compSetProcessor()
     instructionSetFlags.AddInstructionSet(InstructionSet_Vector128);
 #endif // TARGET_ARM64
 
-#if defined(TARGET_LOONGARCH64)
-// TODO: should add LOONGARCH64's features for LOONGARCH64.
-#endif
-
     instructionSetFlags = EnsureInstructionSetFlagsAreValid(instructionSetFlags);
     opts.setSupportedISAs(instructionSetFlags);
 
@@ -2509,8 +2505,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
         // For non-adaptive, padding limit is same as specified by the alignment.
         opts.compJitAlignPaddingLimit = opts.compJitAlignLoopBoundary;
     }
-#elif defined(TARGET_LOONGARCH64)
-// TODO: should be adaptive on LoongArch64.
 #endif
 
     assert(isPow2(opts.compJitAlignLoopBoundary));
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 5c9e02769ad01..856705bf3ad33 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -469,14 +469,15 @@ class LclVarDsc
 
     unsigned char lvIsTemp : 1; // Short-lifetime compiler temp
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
-    unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref.
-#elif defined(TARGET_LOONGARCH64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) ||  defined(TARGET_LOONGARCH64)
     unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref.
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+
+#if defined(TARGET_LOONGARCH64)
     unsigned char lvIs4Field1 : 1;       // Set if the 1st field is int or float within struct for LA-ABI64.
     unsigned char lvIs4Field2 : 1;       // Set if the 2nd field is int or float within struct for LA-ABI64.
-    unsigned char lvIsSplit : 1;         // Set if the argument is splited. also used the lvFldOffset.
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+    unsigned char lvIsSplit : 1;         // Set if the argument is splited.
+#endif // defined(TARGET_LOONGARCH64)
 
     unsigned char lvIsBoolean : 1; // set if variable is boolean
     unsigned char lvSingleDef : 1; // variable has a single def
@@ -671,9 +672,6 @@ class LclVarDsc
     {
         assert(lvIsHfa());
         assert(varTypeIsStruct(lvType));
-#if defined(TARGET_LOONGARCH64)
-        assert(!"lvHfaSlots called not support on LOONGARCH64!");
-#endif
         unsigned slots = 0;
 #ifdef TARGET_ARM
         slots = lvExactSize / sizeof(float);
@@ -8056,14 +8054,9 @@ class Compiler
         // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
         return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
     }
-#elif defined(TARGET_LOONGARCH64)
-    static bool varTypeNeedsPartialCalleeSave(var_types type)
-    { // TODO: supporting SIMD feature for LoongArch64.
-        return false;
-    }
-#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
+#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
 #error("Unknown target architecture for FEATURE_SIMD")
-#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64)
+#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
 protected:
diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h
index 0a19c7cbf138e..97e216dccbb60 100644
--- a/src/coreclr/jit/emitjmps.h
+++ b/src/coreclr/jit/emitjmps.h
@@ -48,23 +48,9 @@ JMP_SMALL(le    , gt    , ble    )  // LE
 
 #elif defined(TARGET_LOONGARCH64)
 
-/* TODO for LOONGARCH: should redesign!!! */
-//       jump   reverse instruction condcode
-JMP_SMALL(jmp   , jmp   , b      )  // AL always
+JMP_SMALL(jmp   , jmp   , b      )
 JMP_SMALL(eq    , ne    , beq    )  // EQ
 JMP_SMALL(ne    , eq    , bne    )  // NE
-//JMP_SMALL(hs    , lo    , bgez    )  // HS also CS
-//JMP_SMALL(lo    , hs    , bltz    )  // LO also CC
-//JMP_SMALL(mi    , pl    , bmi    )  // MI
-//JMP_SMALL(pl    , mi    , bpl    )  // PL
-//JMP_SMALL(vs    , vc    , bvs    )  // VS
-//JMP_SMALL(vc    , vs    , bvc    )  // VC
-//JMP_SMALL(hi    , ls    , bhi    )  // HI
-//JMP_SMALL(ls    , hi    , bls    )  // LS
-//JMP_SMALL(gez    , ltz    , bgez    )  // GE
-//JMP_SMALL(gtz    , lez    , bgtz    )  // GT
-//JMP_SMALL(ltz    , gez    , bltz    )  // LT
-//JMP_SMALL(lez    , gtz    , blez    )  // LE
 
 #else
   #error Unsupported or unset target architecture
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index 4c90765f7609f..2bb2e27f4b5ce 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -623,7 +623,7 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
 // clang-format off
 static const char * const  RegNames[] =
 {
-    #define REGDEF(name, rnum, mask, xname, wname) xname,
+    #define REGDEF(name, rnum, mask, sname) sname,
     #include "register.h"
 };
 // clang-format on
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index bbe6b47b517b3..d718ddc00451b 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -4444,6 +4444,10 @@ struct GenTreeCall final : public GenTree
     bool HasMultiRegRetVal() const
     {
 #ifdef FEATURE_MULTIREG_RET
+#if defined(TARGET_LOONGARCH64)
+        return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1);
+#else
+
 #if defined(TARGET_X86) || defined(TARGET_ARM)
         if (varTypeIsLong(gtType))
         {
@@ -4451,9 +4455,6 @@ struct GenTreeCall final : public GenTree
         }
 #endif
 
-#if defined(TARGET_LOONGARCH64)
-        return (gtType == TYP_STRUCT) && (gtReturnTypeDesc.GetReturnRegCount() > 1);
-#else
         if (!varTypeIsStruct(gtType) || HasRetBufArg())
         {
             return false;
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 8edf1587348c4..8f27dd6c231d9 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -1483,13 +1483,6 @@ bool CodeGenInterface::validImmForBL(ssize_t addr)
 }
 #endif // TARGET_ARM64
 
-#if defined(TARGET_LOONGARCH64)
-bool CodeGenInterface::validImmForBAL(ssize_t addr)
-{ // TODO: can amend/optimize for LoongArch64.
-    return false;
-}
-#endif // TARGET_LOONGARCH64
-
 /*****************************************************************************
  *
  *  Get the machine dependent instruction for performing sign/zero extension.
diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h
index 2316147f14960..46945ed7eae7f 100644
--- a/src/coreclr/jit/jit.h
+++ b/src/coreclr/jit/jit.h
@@ -309,7 +309,7 @@
 // Arm64 Windows supports FEATURE_ARG_SPLIT, note this is different from
 // the official Arm64 ABI.
 // Case: splitting 16 byte struct between x7 and stack
-#if defined(TARGET_ARM) || defined(TARGET_ARM64) /* || defined(TARGET_LOONGARCH64)*/
+#if defined(TARGET_ARM) || defined(TARGET_ARM64)
 #define FEATURE_ARG_SPLIT 1
 #else
 #define FEATURE_ARG_SPLIT 0
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 20fa0a5e10a56..ab262f28c5a64 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -1050,7 +1050,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     else if (cSlots > 1)
                     {
                         varDsc->lvIsSplit = 1;
-                        // varDsc->lvFldOffset = 0;
                         varDsc->SetOtherArgReg(REG_STK);
                         varDscInfo->hasMultiSlotStruct = true;
                         varDscInfo->setAllRegArgUsed(arg1_Type);
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 7468766b7a052..6dd44bdf9b1a6 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -38,9 +38,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 //
 bool Lowering::IsCallTargetInRange(void* addr)
 {
-    ////TODO for LOONGARCH64: should amend for optimize!
-    // assert(!"unimplemented on LOONGARCH yet");
-    // return comp->codeGen->validImmForBAL((ssize_t)addr);
+    //TODO-LOONGARCH64-CQ: using B/BL for optimization.
     return false;
 }
 
diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h
index 8793b619804b9..f6254ecdb79b3 100644
--- a/src/coreclr/jit/lsra.h
+++ b/src/coreclr/jit/lsra.h
@@ -1011,6 +1011,12 @@ class LinearScan : public LinearScanInterface
     void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
 #endif // defined(UNIX_AMD64_ABI)
 
+#if defined(TARGET_LOONGARCH64)
+    // For LoongArch64's ABI, a struct can be passed
+    // partially using registers from the 2 register files.
+    void LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc);
+#endif
+
     // Update reg state for an incoming register argument
     void updateRegStateForArg(LclVarDsc* argDsc);
 
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index b1504e1f73e3d..3379a54e4be08 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -2010,6 +2010,43 @@ void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
 
 #endif // defined(UNIX_AMD64_ABI)
 
+#ifdef TARGET_LOONGARCH64
+void LinearScan::LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc)
+{
+    assert(varTypeIsStruct(argDsc));
+    RegState* intRegState   = &compiler->codeGen->intRegState;
+    RegState* floatRegState = &compiler->codeGen->floatRegState;
+
+    if ((argDsc->GetArgReg() != REG_STK) && (argDsc->GetArgReg() != REG_NA))
+    {
+        if (genRegMask(argDsc->GetArgReg()) & (RBM_ALLFLOAT))
+        {
+            assert(genRegMask(argDsc->GetArgReg()) & (RBM_FLTARG_REGS));
+            floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg());
+        }
+        else
+        {
+            assert(genRegMask(argDsc->GetArgReg()) & (RBM_ARG_REGS));
+            intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg());
+        }
+    }
+
+    if ((argDsc->GetOtherArgReg() != REG_STK) && (argDsc->GetOtherArgReg() != REG_NA))
+    {
+        if (genRegMask(argDsc->GetOtherArgReg()) & (RBM_ALLFLOAT))
+        {
+            assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_FLTARG_REGS));
+            floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetOtherArgReg());
+        }
+        else
+        {
+            assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_ARG_REGS));
+            intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetOtherArgReg());
+        }
+    }
+}
+#endif
+
 //------------------------------------------------------------------------
 // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
 //    regState (either compiler->intRegState or compiler->floatRegState),
@@ -2040,6 +2077,13 @@ void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
     }
     else
 #endif // defined(UNIX_AMD64_ABI)
+#if defined(TARGET_LOONGARCH64)
+    if (varTypeIsStruct(argDsc))
+    {
+        LoongArch64UpdateRegStateForArg(argDsc);
+    }
+    else
+#endif
     {
         RegState* intRegState   = &compiler->codeGen->intRegState;
         RegState* floatRegState = &compiler->codeGen->floatRegState;
diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp
index 5720c4a23e3eb..532fa8fd40976 100644
--- a/src/coreclr/jit/regalloc.cpp
+++ b/src/coreclr/jit/regalloc.cpp
@@ -162,18 +162,6 @@ regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc
 #if FEATURE_MULTIREG_ARGS
     if (varTypeIsStruct(argDsc->lvType))
     {
-#ifdef TARGET_LOONGARCH64
-        {
-            if (argDsc->GetOtherArgReg() != REG_NA)
-            {
-                inArgMask = genRegMask(argDsc->GetOtherArgReg());
-                if (emitter::isFloatReg(argDsc->GetOtherArgReg()))
-                    codeGen->floatRegState.rsCalleeRegArgMaskLiveIn |= inArgMask;
-                else
-                    codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= inArgMask;
-            }
-        }
-#else
         if (argDsc->lvIsHfaRegArg())
         {
             assert(regState->rsIsFloat);
@@ -198,7 +186,6 @@ regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc
                 regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
             }
         }
-#endif
     }
 #endif // FEATURE_MULTIREG_ARGS
 
diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h
index 4127ce8ca4ace..0d8beac0e3aac 100644
--- a/src/coreclr/jit/registerloongarch64.h
+++ b/src/coreclr/jit/registerloongarch64.h
@@ -15,40 +15,39 @@
 #define RMASK(x) (1ULL << (x))
 
 /*
-REGDEF(name, rnum,       mask, xname, wname) */
-REGDEF(R0,      0,     0x0001, "zero" , "zero" )
-REGDEF(RA,      1,     0x0002, "ra"   , "ra"   )
-REGDEF(TP,      2,     0x0004, "tp"   , "tp"   )
-REGDEF(SP,      3,     0x0008, "sp"   , "sp"   )
-REGDEF(A0,      4,     0x0010, "a0"   , "a0"   )
-REGDEF(A1,      5,     0x0020, "a1"   , "a1"   )
-REGDEF(A2,      6,     0x0040, "a2"   , "a2"   )
-REGDEF(A3,      7,     0x0080, "a3"   , "a3"   )
-REGDEF(A4,      8,     0x0100, "a4"   , "a4"   )
-REGDEF(A5,      9,     0x0200, "a5"   , "a5"   )
-REGDEF(A6,     10,     0x0400, "a6"   , "a6"   )
-REGDEF(A7,     11,     0x0800, "a7"   , "a7"   )
-REGDEF(T0,     12,     0x1000, "t0"   , "t0"   )
-REGDEF(T1,     13,     0x2000, "t1"   , "t1"   )
-REGDEF(T2,     14,     0x4000, "t2"   , "t2"   )
-REGDEF(T3,     15,     0x8000, "t3"   , "t3"   )
-REGDEF(T4,     16,    0x10000, "t4"   , "t4"   )
-REGDEF(T5,     17,    0x20000, "t5"   , "t5"   )
-REGDEF(T6,     18,    0x40000, "t6"   , "t6"   )
-REGDEF(T7,     19,    0x80000, "t7"   , "t7"   )
-REGDEF(T8,     20,   0x100000, "t8"   , "t8"   )
-REGDEF(X0,     21,   0x200000, "x0"   , "x0"   )
-REGDEF(FP,     22,   0x400000, "fp"   , "fp"   )
-REGDEF(S0,     23,   0x800000, "s0"   , "s0"   )
-REGDEF(S1,     24,  0x1000000, "s1"   , "s1"   )
-REGDEF(S2,     25,  0x2000000, "s2"   , "s2"   )
-REGDEF(S3,     26,  0x4000000, "s3"   , "s3"   )
-REGDEF(S4,     27,  0x8000000, "s4"   , "s4"   )
-REGDEF(S5,     28, 0x10000000, "s5"   , "s5"   )
-REGDEF(S6,     29, 0x20000000, "s6"   , "s6"   )
-REGDEF(S7,     30, 0x40000000, "s7"   , "s7"   )
-REGDEF(S8,     31, 0x80000000, "s8"   , "s8"   )
-
+REGDEF(name, rnum,       mask, sname) */
+REGDEF(R0,      0,     0x0001, "zero")
+REGDEF(RA,      1,     0x0002, "ra"  )
+REGDEF(TP,      2,     0x0004, "tp"  )
+REGDEF(SP,      3,     0x0008, "sp"  )
+REGDEF(A0,      4,     0x0010, "a0"  )
+REGDEF(A1,      5,     0x0020, "a1"  )
+REGDEF(A2,      6,     0x0040, "a2"  )
+REGDEF(A3,      7,     0x0080, "a3"  )
+REGDEF(A4,      8,     0x0100, "a4"  )
+REGDEF(A5,      9,     0x0200, "a5"  )
+REGDEF(A6,     10,     0x0400, "a6"  )
+REGDEF(A7,     11,     0x0800, "a7"  )
+REGDEF(T0,     12,     0x1000, "t0"  )
+REGDEF(T1,     13,     0x2000, "t1"  )
+REGDEF(T2,     14,     0x4000, "t2"  )
+REGDEF(T3,     15,     0x8000, "t3"  )
+REGDEF(T4,     16,    0x10000, "t4"  )
+REGDEF(T5,     17,    0x20000, "t5"  )
+REGDEF(T6,     18,    0x40000, "t6"  )
+REGDEF(T7,     19,    0x80000, "t7"  )
+REGDEF(T8,     20,   0x100000, "t8"  )
+REGDEF(X0,     21,   0x200000, "x0"  )
+REGDEF(FP,     22,   0x400000, "fp"  )
+REGDEF(S0,     23,   0x800000, "s0"  )
+REGDEF(S1,     24,  0x1000000, "s1"  )
+REGDEF(S2,     25,  0x2000000, "s2"  )
+REGDEF(S3,     26,  0x4000000, "s3"  )
+REGDEF(S4,     27,  0x8000000, "s4"  )
+REGDEF(S5,     28, 0x10000000, "s5"  )
+REGDEF(S6,     29, 0x20000000, "s6"  )
+REGDEF(S7,     30, 0x40000000, "s7"  )
+REGDEF(S8,     31, 0x80000000, "s8"  )
 
 REGALIAS(R21, X0)
 
@@ -56,45 +55,45 @@ REGALIAS(R21, X0)
 #define FMASK(x) (1ULL << (FBASE+(x)))
 
 /*
-REGDEF(name,  rnum,       mask,  xname,  wname) */
-REGDEF(F0,    0+FBASE, FMASK(0),   "f0",  "f0")
-REGDEF(F1,    1+FBASE, FMASK(1),   "f1",  "f1")
-REGDEF(F2,    2+FBASE, FMASK(2),   "f2",  "f2")
-REGDEF(F3,    3+FBASE, FMASK(3),   "f3",  "f3")
-REGDEF(F4,    4+FBASE, FMASK(4),   "f4",  "f4")
-REGDEF(F5,    5+FBASE, FMASK(5),   "f5",  "f5")
-REGDEF(F6,    6+FBASE, FMASK(6),   "f6",  "f6")
-REGDEF(F7,    7+FBASE, FMASK(7),   "f7",  "f7")
-REGDEF(F8,    8+FBASE, FMASK(8),   "f8",  "f8")
-REGDEF(F9,    9+FBASE, FMASK(9),   "f9",  "f9")
-REGDEF(F10,  10+FBASE, FMASK(10), "f10", "f10")
-REGDEF(F11,  11+FBASE, FMASK(11), "f11", "f11")
-REGDEF(F12,  12+FBASE, FMASK(12), "f12", "f12")
-REGDEF(F13,  13+FBASE, FMASK(13), "f13", "f13")
-REGDEF(F14,  14+FBASE, FMASK(14), "f14", "f14")
-REGDEF(F15,  15+FBASE, FMASK(15), "f15", "f15")
-REGDEF(F16,  16+FBASE, FMASK(16), "f16", "f16")
-REGDEF(F17,  17+FBASE, FMASK(17), "f17", "f17")
-REGDEF(F18,  18+FBASE, FMASK(18), "f18", "f18")
-REGDEF(F19,  19+FBASE, FMASK(19), "f19", "f19")
-REGDEF(F20,  20+FBASE, FMASK(20), "f20", "f20")
-REGDEF(F21,  21+FBASE, FMASK(21), "f21", "f21")
-REGDEF(F22,  22+FBASE, FMASK(22), "f22", "f22")
-REGDEF(F23,  23+FBASE, FMASK(23), "f23", "f23")
-REGDEF(F24,  24+FBASE, FMASK(24), "f24", "f24")
-REGDEF(F25,  25+FBASE, FMASK(25), "f25", "f25")
-REGDEF(F26,  26+FBASE, FMASK(26), "f26", "f26")
-REGDEF(F27,  27+FBASE, FMASK(27), "f27", "f27")
-REGDEF(F28,  28+FBASE, FMASK(28), "f28", "f28")
-REGDEF(F29,  29+FBASE, FMASK(29), "f29", "f29")
-REGDEF(F30,  30+FBASE, FMASK(30), "f30", "f30")
-REGDEF(F31,  31+FBASE, FMASK(31), "f31", "f31")
+REGDEF(name,  rnum,       mask,  sname) */
+REGDEF(F0,    0+FBASE, FMASK(0),   "f0")
+REGDEF(F1,    1+FBASE, FMASK(1),   "f1")
+REGDEF(F2,    2+FBASE, FMASK(2),   "f2")
+REGDEF(F3,    3+FBASE, FMASK(3),   "f3")
+REGDEF(F4,    4+FBASE, FMASK(4),   "f4")
+REGDEF(F5,    5+FBASE, FMASK(5),   "f5")
+REGDEF(F6,    6+FBASE, FMASK(6),   "f6")
+REGDEF(F7,    7+FBASE, FMASK(7),   "f7")
+REGDEF(F8,    8+FBASE, FMASK(8),   "f8")
+REGDEF(F9,    9+FBASE, FMASK(9),   "f9")
+REGDEF(F10,  10+FBASE, FMASK(10), "f10")
+REGDEF(F11,  11+FBASE, FMASK(11), "f11")
+REGDEF(F12,  12+FBASE, FMASK(12), "f12")
+REGDEF(F13,  13+FBASE, FMASK(13), "f13")
+REGDEF(F14,  14+FBASE, FMASK(14), "f14")
+REGDEF(F15,  15+FBASE, FMASK(15), "f15")
+REGDEF(F16,  16+FBASE, FMASK(16), "f16")
+REGDEF(F17,  17+FBASE, FMASK(17), "f17")
+REGDEF(F18,  18+FBASE, FMASK(18), "f18")
+REGDEF(F19,  19+FBASE, FMASK(19), "f19")
+REGDEF(F20,  20+FBASE, FMASK(20), "f20")
+REGDEF(F21,  21+FBASE, FMASK(21), "f21")
+REGDEF(F22,  22+FBASE, FMASK(22), "f22")
+REGDEF(F23,  23+FBASE, FMASK(23), "f23")
+REGDEF(F24,  24+FBASE, FMASK(24), "f24")
+REGDEF(F25,  25+FBASE, FMASK(25), "f25")
+REGDEF(F26,  26+FBASE, FMASK(26), "f26")
+REGDEF(F27,  27+FBASE, FMASK(27), "f27")
+REGDEF(F28,  28+FBASE, FMASK(28), "f28")
+REGDEF(F29,  29+FBASE, FMASK(29), "f29")
+REGDEF(F30,  30+FBASE, FMASK(30), "f30")
+REGDEF(F31,  31+FBASE, FMASK(31), "f31")
 
 // The registers with values 64 (NBASE) and above are not real register numbers
 #define NBASE 64
 
 // This must be last!
-REGDEF(STK,   0+NBASE, 0x0000,    "STK", "STK")
+REGDEF(STK,   0+NBASE, 0x0000,    "STK")
 
 /*****************************************************************************/
 #undef  RMASK
diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp
index aade930da4fd5..d28a90ec36f5d 100644
--- a/src/coreclr/jit/regset.cpp
+++ b/src/coreclr/jit/regset.cpp
@@ -23,7 +23,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 /*****************************************************************************/
 
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64)
 const regMaskSmall regMasks[] = {
 #define REGDEF(name, rnum, mask, xname, wname) mask,
 #include "register.h"
@@ -228,11 +228,9 @@ RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_r
 
     rsMaskResvd = RBM_NONE;
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     rsMaskCalleeSaved = RBM_NONE;
-#elif defined(TARGET_LOONGARCH64)
-    rsMaskCalleeSaved = RBM_NONE;
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 #ifdef TARGET_ARM
     rsMaskPreSpillRegArg = RBM_NONE;
diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h
index f2bc7875152b6..a816c0d607757 100644
--- a/src/coreclr/jit/regset.h
+++ b/src/coreclr/jit/regset.h
@@ -123,11 +123,9 @@ class RegSet
 private:
     regMaskTP _rsMaskVars; // backing store for rsMaskVars property
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) ||  defined(TARGET_LOONGARCH64)
     regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
-#elif defined(TARGET_LOONGARCH64)
-    regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
-#endif // TARGET_ARM
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 public:                    // TODO-Cleanup: Should be private, but Compiler uses it
     regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty)
diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h
index 97d9a69328483..536ef627d6062 100644
--- a/src/coreclr/jit/target.h
+++ b/src/coreclr/jit/target.h
@@ -181,7 +181,7 @@ enum _regMask_enum : unsigned
 
 enum _regNumber_enum : unsigned
 {
-#define REGDEF(name, rnum, mask, xname, wname) REG_##name = rnum,
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
 #define REGALIAS(alias, realname) REG_##alias = REG_##realname,
 #include "register.h"
 
@@ -193,7 +193,7 @@ enum _regNumber_enum : unsigned
 enum _regMask_enum : unsigned __int64
 {
     RBM_NONE = 0,
-#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask,
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
 #define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
 #include "register.h"
 };
@@ -702,7 +702,7 @@ inline bool isFloatRegType(var_types type)
 C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RBM_NONE);
 C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RBM_NONE);
 
-#if ETW_EBP_FRAMED && !defined(TARGET_LOONGARCH64)
+#if ETW_EBP_FRAMED
 // Frame pointer isn't either if we're supporting ETW frame chaining
 C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RBM_NONE);
 C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE);
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
index 2c6153f6579fd..2bfaea897abef 100644
--- a/src/coreclr/jit/targetloongarch64.h
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -8,11 +8,9 @@
 
 // clang-format off
   #define CPU_LOAD_STORE_ARCH      1
-  //#define CPU_LONG_USES_REGPAIR    0
   #define CPU_HAS_FP_SUPPORT       1
   #define ROUND_FLOAT              0       // Do not round intermed float expression results
   #define CPU_HAS_BYTE_REGS        0
-  //#define CPU_USES_BLOCK_MOVE      0
 
   #define CPBLK_UNROLL_LIMIT       64      // Upper bound to let the code generator to loop unroll CpBlk.
   #define INITBLK_UNROLL_LIMIT     64      // Upper bound to let the code generator to loop unroll InitBlk.
@@ -71,7 +69,6 @@
   #define RBM_INT_CALLEE_SAVED    (RBM_S0|RBM_S1|RBM_S2|RBM_S3|RBM_S4|RBM_S5|RBM_S6|RBM_S7|RBM_S8)
   #define RBM_INT_CALLEE_TRASH    (RBM_A0|RBM_A1|RBM_A2|RBM_A3|RBM_A4|RBM_A5|RBM_A6|RBM_A7|RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T5|RBM_T6|RBM_T7|RBM_T8)
   #define RBM_FLT_CALLEE_SAVED    (RBM_F24|RBM_F25|RBM_F26|RBM_F27|RBM_F28|RBM_F29|RBM_F30|RBM_F31)
-  //#define RBM_FLT_CALLEE_TRASH    (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F12|RBM_F13|RBM_F14|RBM_F15|RBM_F16|RBM_F17|RBM_F18|RBM_F19|RBM_F20|RBM_F21|RBM_F22|RBM_F23)
   #define RBM_FLT_CALLEE_TRASH    (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7)
 
   #define RBM_CALLEE_SAVED        (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
@@ -120,7 +117,6 @@
   // register to hold shift amount; no special register is required on LOONGARCH64.
   #define REG_SHIFT                REG_NA
   #define RBM_SHIFT                RBM_ALLINT
-  //#define PREDICT_REG_SHIFT        PREDICT_REG
 
   // This is a general scratch register that does not conflict with the argument registers
   #define REG_SCRATCH              REG_T0
@@ -142,27 +138,23 @@
   // LOONGARCH64 write barrier ABI (see vm/loongarch64/asmhelpers.S):
   // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier):
   //     On entry:
-  //       v0: the destination address (LHS of the assignment)
-  //       v1: the object reference (RHS of the assignment)
+  //       t6: the destination address (LHS of the assignment)
+  //       t7: the object reference (RHS of the assignment)
   //     On exit:
   //       t0: trashed
   //       t1: trashed
-  //       t2: trashed
   //       t3: trashed
-  //       v0: incremented by 8
-  //       v1: trashed
-  //       ??: trashed if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP (currently non-Windows)
+  //       t4: trashed
+  //       t6: incremented by 8
+  //       t7: trashed
   // CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier):
   //     On entry:
   //       t8: the source address (points to object reference to write)
-  //       v0: the destination address (object reference written here)
+  //       t6: the destination address (object reference written here)
   //     On exit:
   //       t8: incremented by 8
-  //       v0: incremented by 8
+  //       t6: incremented by 8
   //
-  // Note that while ?reg? is currently only trashed under FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP,
-  // currently only set for non-Windows//, it is expected to be set in the future for Windows, and for R2R.
-  // So simply always consider it trashed, to avoid later breaking changes.
 
   #define REG_WRITE_BARRIER_DST          REG_T6
   #define RBM_WRITE_BARRIER_DST          RBM_T6
@@ -176,7 +168,7 @@
   #define REG_WRITE_BARRIER_SRC_BYREF    REG_T8
   #define RBM_WRITE_BARRIER_SRC_BYREF    RBM_T8
 
-  #define RBM_CALLEE_TRASH_NOGC          (RBM_T0|RBM_T1|RBM_T2|RBM_T3|RBM_T4|RBM_T6|RBM_T7|RBM_DEFAULT_HELPER_CALL_TARGET)
+  #define RBM_CALLEE_TRASH_NOGC          (RBM_T0|RBM_T1|RBM_T3|RBM_T4|RBM_T6|RBM_T7|RBM_DEFAULT_HELPER_CALL_TARGET)
 
   // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF.
   #define RBM_CALLEE_TRASH_WRITEBARRIER         (RBM_WRITE_BARRIER_DST|RBM_CALLEE_TRASH_NOGC)
@@ -209,7 +201,7 @@
 
   #define REG_INDIRECT_CALL_TARGET_REG    REG_T6
 
-  // Registers used by PInvoke frame setup  //should confirm.
+  // Registers used by PInvoke frame setup
   #define REG_PINVOKE_FRAME        REG_T0
   #define RBM_PINVOKE_FRAME        RBM_T0
   #define REG_PINVOKE_TCB          REG_T1
@@ -270,7 +262,7 @@
   #define RBM_FPBASE               RBM_FP
   #define STR_FPBASE               "fp"
   #define REG_SPBASE               REG_SP
-  #define RBM_SPBASE               RBM_SP     // reuse the RBM for REG_SP
+  #define RBM_SPBASE               RBM_SP
   #define STR_SPBASE               "sp"
 
   #define FIRST_ARG_STACK_OFFS    (2*REGSIZE_BYTES)   // Caller's saved FP and return address
diff --git a/src/coreclr/jit/unwindloongarch64.cpp b/src/coreclr/jit/unwindloongarch64.cpp
index 110c88985a510..faae126aa5718 100644
--- a/src/coreclr/jit/unwindloongarch64.cpp
+++ b/src/coreclr/jit/unwindloongarch64.cpp
@@ -232,7 +232,7 @@ void Compiler::unwindPush(regNumber reg)
 
 void Compiler::unwindAllocStack(unsigned size)
 {
-#if defined(TARGET_UNIX)
+#if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
     {
         if (compGeneratingProlog)
@@ -242,7 +242,7 @@ void Compiler::unwindAllocStack(unsigned size)
 
         return;
     }
-#endif // TARGET_UNIX
+#endif // FEATURE_CFI_SUPPORT
 
     UnwindInfo* pu = &funCurrentFunc()->uwi;
 
@@ -275,7 +275,7 @@ void Compiler::unwindAllocStack(unsigned size)
 
 void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
 {
-#if defined(TARGET_UNIX)
+#if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
     {
         if (compGeneratingProlog)
@@ -285,7 +285,7 @@ void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
 
         return;
     }
-#endif // TARGET_UNIX
+#endif // FEATURE_CFI_SUPPORT
 
     UnwindInfo* pu = &funCurrentFunc()->uwi;
 
@@ -343,7 +343,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset)
     assert(0 <= offset && offset <= 2047);
     assert((offset % 8) == 0);
 
-#if defined(TARGET_UNIX)
+#if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
     {
         if (compGeneratingProlog)
@@ -356,7 +356,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset)
 
         return;
     }
-#endif // TARGET_UNIX
+#endif // FEATURE_CFI_SUPPORT
     int z = offset / 8;
     // assert(0 <= z && z <= 0xFF);
 
@@ -390,63 +390,7 @@ void Compiler::unwindSaveReg(regNumber reg, int offset)
 
 void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset)
 {
-    // TODO:temp not used on loongarch64.
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    UnwindInfo* pu = &funCurrentFunc()->uwi;
-
-    // stp reg1, reg2, [sp, #offset]
-
-    // offset for store pair in prolog must be positive and a multiple of 16.
-    assert(0 <= offset && offset <= 0xff0);
-    assert((offset % 16) == 0);
-
-    int z = offset / 8;
-    //assert(0 <= z && z <= 0x1FE);
-
-#if defined(TARGET_UNIX)
-    if (generateCFIUnwindCodes())
-    {
-        if (compGeneratingProlog)
-        {
-            FuncInfoDsc*   func     = funCurrentFunc();
-            UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func);
-
-            createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg1), offset);
-            createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg2), offset + 8);
-        }
-
-        return;
-    }
-#endif // TARGET_UNIX
-    if (reg1 == REG_FP)
-    {
-        // save_fpra: 0100zzzz | zzzzzzzz: save <fp,ra> pair at [sp+#Z*8], offset <= 0xff0
-        assert(reg2 == REG_RA);
-
-        pu->AddCode(0x40 | (BYTE)(z >> 8), (BYTE)z);
-    }
-    else if (reg2 == REG_RA)
-    {
-        assert(!"unimplemented on LOONGARCH yet");
-    }
-    else if (emitter::isGeneralRegister(reg1))
-    {
-        // save_regp: 11001000 | 0xxxzzzz | zzzzzzzz: save s(0 + #X) pair at [sp + #Z * 8], offset <= 4080
-        assert(REG_NEXT(reg1) == reg2);
-        assert(REG_S0 <= reg1 && // first legal pair: S0, S1
-               reg1 <= REG_S6);  // last legal pair: S6, S7 (FP is never saved without RA)
-
-        BYTE x = (BYTE)(reg1 - REG_S0);
-        //assert(0 <= x && x <= 0x6);
-
-        pu->AddCode(0xC8, (BYTE)(x << 4) | (BYTE)(z >> 8), (BYTE)z);
-    }
-    else
-    {
-        assert(!"unimplemented on LOONGARCH yet");
-    }
-#endif
+    assert(!"unused on LOONGARCH64 yet");
 }
 
 void Compiler::unwindReturn(regNumber reg)
@@ -948,13 +892,13 @@ void Compiler::unwindBegProlog()
 {
     assert(compGeneratingProlog);
 
-#if defined(TARGET_UNIX)
+#if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
     {
         unwindBegPrologCFI();
         return;
     }
-#endif // TARGET_UNIX
+#endif // FEATURE_CFI_SUPPORT
 
     FuncInfoDsc* func = funCurrentFunc();
 
@@ -980,12 +924,12 @@ void Compiler::unwindBegEpilog()
 {
     assert(compGeneratingEpilog);
 
-#if defined(TARGET_UNIX)
+#if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
     {
         return;
     }
-#endif // TARGET_UNIX
+#endif // FEATURE_CFI_SUPPORT
 
     funCurrentFunc()->uwi.AddEpilog();
 }
@@ -1000,12 +944,12 @@ void Compiler::unwindEndEpilog()
 // for them.
 void Compiler::unwindPadding()
 {
-#if defined(TARGET_UNIX)
+#if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
     {
         return;
     }
-#endif // TARGET_UNIX
+#endif // FEATURE_CFI_SUPPORT
 
     UnwindInfo* pu = &funCurrentFunc()->uwi;
     GetEmitter()->emitUnwindNopPadding(pu->GetCurrentEmitterLocation(), this);
@@ -1030,7 +974,7 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func)
     BOOL isFunclet          = (func->funKind == FUNC_ROOT) ? FALSE : TRUE;
     bool funcHasColdSection = false;
 
-#if defined(TARGET_UNIX)
+#if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
     {
         DWORD unwindCodeBytes = 0;
@@ -1043,7 +987,7 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func)
 
         return;
     }
-#endif // TARGET_UNIX
+#endif // FEATURE_CFI_SUPPORT
 
     // If there is cold code, split the unwind data between the hot section and the
     // cold section. This needs to be done before we split into fragments, as each
@@ -1103,13 +1047,13 @@ void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode
     static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
     static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);
 
-#if defined(TARGET_UNIX)
+#if defined(FEATURE_CFI_SUPPORT)
     if (generateCFIUnwindCodes())
     {
         unwindEmitFuncCFI(func, pHotCode, pColdCode);
         return;
     }
-#endif // TARGET_UNIX
+#endif // FEATURE_CFI_SUPPORT
 
     func->uwi.Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, true);
 
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index 078de7524a3a4..9f3c50279fe5a 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -132,8 +132,6 @@ const char* getRegName(regNumber reg)
     static const char* const regNames[] = {
 #if defined(TARGET_ARM64)
 #define REGDEF(name, rnum, mask, xname, wname) xname,
-#elif defined(TARGET_LOONGARCH64)
-#define REGDEF(name, rnum, mask, xname, wname) xname,
 #else
 #define REGDEF(name, rnum, mask, sname) sname,
 #endif
@@ -219,7 +217,7 @@ const char* getRegNameFloat(regNumber reg, var_types type)
         return regName;
     }
 
-#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64)
 
     static const char* regNamesFloat[] = {
 #define REGDEF(name, rnum, mask, xname, wname) xname,
@@ -229,6 +227,17 @@ const char* getRegNameFloat(regNumber reg, var_types type)
 
     return regNamesFloat[reg];
 
+#elif defined(TARGET_LOONGARCH64)
+
+    static const char* regNamesFloat[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "register.h"
+    };
+
+    assert((unsigned)reg < ArrLen(regNamesFloat));
+
+    return regNamesFloat[reg];
+
 #else
     static const char* regNamesFloat[] = {
 #define REGDEF(name, rnum, mask, sname) "x" sname,
@@ -320,7 +329,7 @@ void dspRegMask(regMaskTP regMask, size_t minSiz)
 // No register ranges
 
 #elif defined(TARGET_LOONGARCH64)
-                if (REG_A0 <= regNum && regNum <= REG_X0)
+                if (REG_A0 <= regNum && regNum <= REG_T8)
                 {
                     regHead    = regNum;
                     inRegRange = true;
@@ -336,7 +345,7 @@ void dspRegMask(regMaskTP regMask, size_t minSiz)
             else if ((regNum == REG_INT_LAST) || (regNum == REG_R17) // last register before TEB
                      || (regNum == REG_R28))                         // last register before FP
 #elif defined(TARGET_LOONGARCH64)
-            else if ((regNum == REG_INT_LAST) || (regNum == REG_X0))
+            else if ((regNum == REG_INT_LAST) || (regNum == REG_A7) || (regNum == REG_T8))
 #else  // TARGET_LOONGARCH64
             // We've already printed a register. Is this the end of a range?
             else if (regNum == REG_INT_LAST)
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index 0132d894c388f..52842c4375c21 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -49,16 +49,14 @@ struct FloatTraits
     // Notes:
     //    "Default" NaN value returned by expression 0.0f / 0.0f on x86/x64 has
     //    different binary representation (0xffc00000) than NaN on
-    //    ARM32/ARM64 (0x7fc00000).
+    //    ARM32/ARM64/LoongArch64 (0x7fc00000).
 
     static float NaN()
     {
 #if defined(TARGET_XARCH)
         unsigned bits = 0xFFC00000u;
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         unsigned           bits = 0x7FC00000u;
-#elif defined(TARGET_LOONGARCH64)
-        unsigned           bits = 0xFFC00000u;
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -77,16 +75,14 @@ struct DoubleTraits
     // Notes:
     //    "Default" NaN value returned by expression 0.0 / 0.0 on x86/x64 has
     //    different binary representation (0xfff8000000000000) than NaN on
-    //    ARM32/ARM64 (0x7ff8000000000000).
+    //    ARM32/ARM64/LoongArch64 (0x7ff8000000000000).
 
     static double NaN()
     {
 #if defined(TARGET_XARCH)
         unsigned long long bits = 0xFFF8000000000000ull;
-#elif defined(TARGET_ARMARCH)
+#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         unsigned long long bits = 0x7FF8000000000000ull;
-#elif defined(TARGET_LOONGARCH64)
-        unsigned long long bits = 0xFFF8000000000000ull;
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -2850,6 +2846,9 @@ ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, Valu
             else
             {
 #ifdef TARGET_LOONGARCH64
+                // For LoongArch64, the int32 will signed-extend default,
+                // e.g. `ld_w $r4, $r5, 4` loading a int32 from the addr `$r5+4`.
+                // So there is no need to signed-extend.
                 assert(typ == TYP_INT || typ == TYP_LONG);
 #else
                 assert(typ == TYP_INT);
diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h
index aac563b265e36..872c5e376aed5 100644
--- a/src/coreclr/jit/valuenumfuncs.h
+++ b/src/coreclr/jit/valuenumfuncs.h
@@ -182,7 +182,7 @@ ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false)   // All of t
 // No Hardware Intrinsics on ARM32
 
 #elif defined (TARGET_LOONGARCH64)
-    //TODO: add LoongArch64's Hardware Instructions.
+    //TODO-LOONGARCH64-CQ: add LoongArch64's Hardware Intrinsics Instructions if supported.
 
 #else
 #error Unsupported platform

From 377c2fd67acec6709d959ebdf1e99341c7e79eee Mon Sep 17 00:00:00 2001
From: Qiao Pengcheng <qiaopengcheng@loongson.cn>
Date: Wed, 16 Feb 2022 20:37:01 +0800
Subject: [PATCH 21/46] [LoongArch64] apply the `format.patch`.

---
 src/coreclr/jit/codegencommon.cpp    |  2 +-
 src/coreclr/jit/compiler.h           | 10 +++++-----
 src/coreclr/jit/lowerloongarch64.cpp |  2 +-
 src/coreclr/jit/lsrabuild.cpp        |  2 +-
 src/coreclr/jit/regset.h             |  4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 4f5ad64dce390..a8da6bf3c7308 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -9460,7 +9460,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
             switch (addrInfo.accessType)
             {
                 case IAT_VALUE:
-                    //TODO-LOONGARCH64-CQ: using B/BL for optimization.
+                // TODO-LOONGARCH64-CQ: using B/BL for optimization.
                 case IAT_PVALUE:
                     // Load the address into a register, load indirect and call  through a register
                     // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 856705bf3ad33..09ee3a6da4ec0 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -469,15 +469,15 @@ class LclVarDsc
 
     unsigned char lvIsTemp : 1; // Short-lifetime compiler temp
 
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) ||  defined(TARGET_LOONGARCH64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     unsigned char lvIsImplicitByRef : 1; // Set if the argument is an implicit byref.
 #endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
 #if defined(TARGET_LOONGARCH64)
-    unsigned char lvIs4Field1 : 1;       // Set if the 1st field is int or float within struct for LA-ABI64.
-    unsigned char lvIs4Field2 : 1;       // Set if the 2nd field is int or float within struct for LA-ABI64.
-    unsigned char lvIsSplit : 1;         // Set if the argument is splited.
-#endif // defined(TARGET_LOONGARCH64)
+    unsigned char lvIs4Field1 : 1; // Set if the 1st field is int or float within struct for LA-ABI64.
+    unsigned char lvIs4Field2 : 1; // Set if the 2nd field is int or float within struct for LA-ABI64.
+    unsigned char lvIsSplit : 1;   // Set if the argument is splited.
+#endif                             // defined(TARGET_LOONGARCH64)
 
     unsigned char lvIsBoolean : 1; // set if variable is boolean
     unsigned char lvSingleDef : 1; // variable has a single def
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 6dd44bdf9b1a6..4196b23578f61 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -38,7 +38,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 //
 bool Lowering::IsCallTargetInRange(void* addr)
 {
-    //TODO-LOONGARCH64-CQ: using B/BL for optimization.
+    // TODO-LOONGARCH64-CQ: using B/BL for optimization.
     return false;
 }
 
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index 3379a54e4be08..90473a99ea003 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -2078,7 +2078,7 @@ void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
     else
 #endif // defined(UNIX_AMD64_ABI)
 #if defined(TARGET_LOONGARCH64)
-    if (varTypeIsStruct(argDsc))
+        if (varTypeIsStruct(argDsc))
     {
         LoongArch64UpdateRegStateForArg(argDsc);
     }
diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h
index a816c0d607757..9c1a1041eecf8 100644
--- a/src/coreclr/jit/regset.h
+++ b/src/coreclr/jit/regset.h
@@ -123,9 +123,9 @@ class RegSet
 private:
     regMaskTP _rsMaskVars; // backing store for rsMaskVars property
 
-#if defined(TARGET_ARMARCH) ||  defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
-#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
+#endif                           // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 public:                    // TODO-Cleanup: Should be private, but Compiler uses it
     regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty)

From f3f9636693ad95341de049776603f27c00a5b4bb Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Thu, 17 Feb 2022 15:18:43 +0800
Subject: [PATCH 22/46] [LoongArch64] round 2 amend for reviewing by
 @BruceForstall.

---
 src/coreclr/jit/codegencommon.cpp      | 453 +--------------
 src/coreclr/jit/codegenloongarch64.cpp | 444 +++++++++++++-
 src/coreclr/jit/emitloongarch64.cpp    |  60 +-
 src/coreclr/jit/gentree.cpp            |  51 +-
 src/coreclr/jit/importer.cpp           |  14 +-
 src/coreclr/jit/instr.cpp              |  20 +-
 src/coreclr/jit/instr.h                |  18 +-
 src/coreclr/jit/instrsloongarch64.h    | 774 ++++++++++++-------------
 src/coreclr/jit/lsra.h                 |  11 +-
 src/coreclr/jit/lsrabuild.cpp          |  64 +-
 src/coreclr/jit/morph.cpp              |  33 +-
 src/coreclr/jit/optimizer.cpp          |  10 +
 12 files changed, 980 insertions(+), 972 deletions(-)

diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index a8da6bf3c7308..38c98960fcf92 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -140,7 +140,6 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
 #endif // TARGET_ARM64
 
 #ifdef TARGET_LOONGARCH64
-    SetHasTailCalls(false);
     genSaveFpRaWithAllCalleeSavedRegisters = false;
 #endif // TARGET_LOONGARCH64
 }
@@ -1757,72 +1756,6 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
 }
 #endif // TARGET_ARMARCH
 
-#ifdef TARGET_LOONGARCH64
-//------------------------------------------------------------------------
-// genEmitGSCookieCheck: Generate code to check that the GS cookie
-// wasn't thrashed by a buffer overrun.
-//
-void CodeGen::genEmitGSCookieCheck(bool pushReg)
-{
-    noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
-
-    // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
-    // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
-    if (!pushReg && (compiler->info.compRetType == TYP_REF))
-        gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
-
-    // We need two temporary registers, to load the GS cookie values and compare them. We can't use
-    // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
-    // callee-trash registers, which should not contain anything interesting at this point.
-    // We don't have any IR node representing this check, so LSRA can't communicate registers
-    // for us to use.
-
-    regNumber regGSConst = REG_GSCOOKIE_TMP_0;
-    regNumber regGSValue = REG_GSCOOKIE_TMP_1;
-
-    if (compiler->gsGlobalSecurityCookieAddr == nullptr)
-    {
-        // load the GS cookie constant into a reg
-        //
-        genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
-    }
-    else
-    {
-        //// Ngen case - GS cookie constant needs to be accessed through an indirection.
-        // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
-        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0);
-        if (compiler->opts.compReloc)
-        {
-            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst,
-                                       (ssize_t)compiler->gsGlobalSecurityCookieAddr);
-        }
-        else
-        { ////TODO:LoongArch64 should amend for optimize!
-            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst,
-            // (ssize_t)compiler->gsGlobalSecurityCookieAddr);
-            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, );
-            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst,
-                                      ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12);
-            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst,
-                                      (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
-            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst,
-                                        ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2);
-        }
-        regSet.verifyRegUsed(regGSConst);
-    }
-    // Load this method's GS value from the stack frame
-    GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
-
-    // Compare with the GC cookie constant
-    BasicBlock* gsCheckBlk = genCreateTempLabel();
-    GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue);
-
-    // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
-    genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
-    genDefineTempLabel(gsCheckBlk);
-}
-#endif // TARGET_LOONGARCH64
-
 /*****************************************************************************
  *
  *  Generate an exit sequence for a return from a method (note: when compiling
@@ -3317,7 +3250,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
 #endif
 
-#if defined(TARGET_LOONGARCH64)
+#ifdef TARGET_LOONGARCH64
 void CodeGen::genFnPrologCalleeRegArgs()
 {
     assert(!(intRegState.rsCalleeRegArgMaskLiveIn & floatRegState.rsCalleeRegArgMaskLiveIn));
@@ -3538,7 +3471,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                     baseOffset = 8;
                     base += 8;
 
-                    GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size /*EA_PTRSIZE*/, REG_SCRATCH, REG_SPBASE,
+                    GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE,
                                                   genTotalFrameSize());
                     if ((-2048 <= base) && (base < 2048))
                     {
@@ -3608,7 +3541,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
 
     assert(!regArgMaskLive);
 }
-#else //! defined(TARGET_LOONGARCH64)
+#else
 void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
 {
 #ifdef DEBUG
@@ -4979,7 +4912,6 @@ void CodeGen::genEnregisterIncomingStackArgs()
 #ifdef TARGET_LOONGARCH64
         {
             bool FPbased;
-            // int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
             int base = compiler->lvaFrameAddress(varNum, &FPbased);
 
             if ((-2048 <= base) && (base < 2048))
@@ -5953,214 +5885,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
     noway_assert(compiler->compCalleeRegsPushed == popCount);
 }
 
-#elif defined(TARGET_LOONGARCH64)
-void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
-{
-    assert(compiler->compGeneratingEpilog);
-
-    regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
-
-    if (isFramePointerUsed())
-    {
-        rsRestoreRegs |= RBM_FPBASE;
-    }
-
-    rsRestoreRegs |= RBM_RA; // We must save/restore the return address.
-
-    regMaskTP regsToRestoreMask = rsRestoreRegs;
-
-    int totalFrameSize = genTotalFrameSize();
-
-    int calleeSaveSPOffset = 0; // This will be the starting place for restoring
-                                // the callee-saved registers, in decreasing order.
-    int frameType         = 0;  // An indicator of what type of frame we are popping.
-    int calleeSaveSPDelta = 0;  // Amount to add to SP after callee-saved registers have been restored.
-
-    if (isFramePointerUsed())
-    {
-        if (totalFrameSize <= 2047)
-        {
-            if (compiler->compLocallocUsed)
-            {
-                int SPtoFPdelta = genSPtoFPdelta();
-                // Restore sp from fp
-                GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta);
-                compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
-            }
-
-            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-            {
-                JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
-                        dspBool(compiler->compLocallocUsed));
-
-                frameType = 1;
-
-                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
-
-                calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
-            }
-            else
-            {
-                frameType = 2;
-
-                calleeSaveSPOffset = compiler->compLclFrameSize;
-
-                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
-                        dspBool(compiler->compLocallocUsed));
-            }
-            // calleeSaveSPDelta = 0;
-        }
-        else
-        {
-            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-            {
-                JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; "
-                        "localloc? %s\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
-                        dspBool(compiler->compLocallocUsed));
-
-                frameType = 3;
-
-                int outSzAligned;
-                if (compiler->lvaOutgoingArgSpaceSize >= 2040)
-                {
-                    int offset         = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                    calleeSaveSPDelta  = AlignUp((UINT)offset, STACK_ALIGN);
-                    calleeSaveSPOffset = calleeSaveSPDelta - offset;
-
-                    int offset2       = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
-                    calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN);
-                    offset2           = calleeSaveSPDelta - offset2;
-
-                    if (compiler->compLocallocUsed)
-                    {
-                        // Restore sp from fp
-                        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
-                        compiler->unwindSetFrameReg(REG_FPBASE, offset2);
-                    }
-                    else
-                    {
-                        outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf;
-                        // if (outSzAligned > 0)
-                        {
-                            genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true);
-                        }
-                    }
-
-                    regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
-
-                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
-                    compiler->unwindSaveReg(REG_RA, offset2 + 8);
-
-                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
-                    compiler->unwindSaveReg(REG_FP, offset2);
-
-                    genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
-
-                    calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                    calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
-                }
-                else
-                {
-                    int offset2 = compiler->lvaOutgoingArgSpaceSize;
-                    if (compiler->compLocallocUsed)
-                    {
-                        // Restore sp from fp
-                        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
-                        compiler->unwindSetFrameReg(REG_FPBASE, offset2);
-                    }
-
-                    regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
-
-                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
-                    compiler->unwindSaveReg(REG_RA, offset2 + 8);
-
-                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
-                    compiler->unwindSaveReg(REG_FP, offset2);
-
-                    calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                    calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
-                    calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
-
-                    genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr,
-                                              /* reportUnwindData */ true);
-                }
-            }
-            else
-            {
-                frameType = 4;
-
-                JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
-                        dspBool(compiler->compLocallocUsed));
-
-                calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize;
-                calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
-                calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
-
-                if (compiler->compLocallocUsed)
-                {
-                    calleeSaveSPDelta = calleeSaveSPOffset + REGSIZE_BYTES;
-
-                    // Restore sp from fp
-                    GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -calleeSaveSPDelta);
-                    compiler->unwindSetFrameReg(REG_FPBASE, calleeSaveSPDelta);
-                }
-                else
-                {
-                    calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta;
-                    genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
-                }
-
-                calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize;
-                calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
-            }
-        }
-    }
-    else
-    {
-        // No frame pointer (no chaining).
-        NYI("Frame without frame pointer");
-        calleeSaveSPOffset = 0;
-    }
-
-    JITDUMP("    calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta);
-    genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
-
-    if (frameType == 1)
-    {
-        calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize;
-
-        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8);
-        compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8);
-
-        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset);
-        compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset);
-
-        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
-        compiler->unwindAllocStack(totalFrameSize);
-    }
-    else if (frameType == 2)
-    {
-        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
-        compiler->unwindAllocStack(totalFrameSize);
-    }
-    else if (frameType == 3)
-    {
-        // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
-    }
-    else if (frameType == 4)
-    {
-        // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
-    }
-    else
-    {
-        unreached();
-    }
-}
-
 #endif // TARGET*
 
 // We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
@@ -9374,176 +9098,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
 }
 
 #elif defined(TARGET_LOONGARCH64)
-
-void CodeGen::genFnEpilog(BasicBlock* block)
-{
-#ifdef DEBUG
-    if (verbose)
-        printf("*************** In genFnEpilog()\n");
-#endif // DEBUG
-
-    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
-
-    VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, GetEmitter()->emitInitGCrefVars);
-    gcInfo.gcRegGCrefSetCur = GetEmitter()->emitInitGCrefRegs;
-    gcInfo.gcRegByrefSetCur = GetEmitter()->emitInitByrefRegs;
-
-#ifdef DEBUG
-    if (compiler->opts.dspCode)
-        printf("\n__epilog:\n");
-
-    if (verbose)
-    {
-        printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
-        dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
-        printf(", gcRegGCrefSetCur=");
-        printRegMaskInt(gcInfo.gcRegGCrefSetCur);
-        GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
-        printf(", gcRegByrefSetCur=");
-        printRegMaskInt(gcInfo.gcRegByrefSetCur);
-        GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
-        printf("\n");
-    }
-#endif // DEBUG
-
-    bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
-
-    GenTree* lastNode = block->lastNode();
-
-    // Method handle and address info used in case of jump epilog
-    CORINFO_METHOD_HANDLE methHnd = nullptr;
-    CORINFO_CONST_LOOKUP  addrInfo;
-    addrInfo.addr       = nullptr;
-    addrInfo.accessType = IAT_VALUE;
-
-    if (jmpEpilog && lastNode->gtOper == GT_JMP)
-    {
-        methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1;
-        compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
-    }
-
-    compiler->unwindBegEpilog();
-
-    if (jmpEpilog)
-    {
-        SetHasTailCalls(true);
-
-        noway_assert(block->bbJumpKind == BBJ_RETURN);
-        noway_assert(block->GetFirstLIRNode() != nullptr);
-
-        /* figure out what jump we have */
-        GenTree* jmpNode = lastNode;
-#if !FEATURE_FASTTAILCALL
-        noway_assert(jmpNode->gtOper == GT_JMP);
-#else  // FEATURE_FASTTAILCALL
-        // armarch
-        // If jmpNode is GT_JMP then gtNext must be null.
-        // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
-        noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
-
-        // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
-        noway_assert((jmpNode->gtOper == GT_JMP) ||
-                     ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
-
-        // The next block is associated with this "if" stmt
-        if (jmpNode->gtOper == GT_JMP)
-#endif // FEATURE_FASTTAILCALL
-        {
-            // Simply emit a jump to the methodHnd. This is similar to a call so we can use
-            // the same descriptor with some minor adjustments.
-            assert(methHnd != nullptr);
-            assert(addrInfo.addr != nullptr);
-
-            emitter::EmitCallType callType;
-            void*                 addr;
-            regNumber             indCallReg;
-            switch (addrInfo.accessType)
-            {
-                case IAT_VALUE:
-                // TODO-LOONGARCH64-CQ: using B/BL for optimization.
-                case IAT_PVALUE:
-                    // Load the address into a register, load indirect and call  through a register
-                    // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use
-                    callType   = emitter::EC_INDIR_R;
-                    indCallReg = REG_INDIRECT_CALL_TARGET_REG;
-                    addr       = NULL;
-                    instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
-                    if (addrInfo.accessType == IAT_PVALUE)
-                    {
-                        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, indCallReg, indCallReg, 0);
-                        regSet.verifyRegUsed(indCallReg);
-                    }
-                    break;
-
-                case IAT_RELPVALUE:
-                {
-                    // Load the address into a register, load relative indirect and call through a register
-                    // We have to use R12 since we assume the argument registers are in use
-                    // LR is used as helper register right before it is restored from stack, thus,
-                    // all relative address calculations are performed before LR is restored.
-                    callType   = emitter::EC_INDIR_R;
-                    indCallReg = REG_T2;
-                    addr       = NULL;
-
-                    regSet.verifyRegUsed(indCallReg);
-                    break;
-                }
-
-                case IAT_PPVALUE:
-                default:
-                    NO_WAY("Unsupported JMP indirection");
-            }
-
-            /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
-             * the same descriptor with some minor adjustments.
-             */
-
-            genPopCalleeSavedRegisters(true);
-
-            // clang-format off
-            GetEmitter()->emitIns_Call(callType,
-                                       methHnd,
-                                       INDEBUG_LDISASM_COMMA(nullptr)
-                                       addr,
-                                       0,          // argSize
-                                       EA_UNKNOWN // retSize
-                                       MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
-                                       gcInfo.gcVarPtrSetCur,
-                                       gcInfo.gcRegGCrefSetCur,
-                                       gcInfo.gcRegByrefSetCur,
-                                       DebugInfo(),
-                                       indCallReg,    // ireg
-                                       REG_NA,        // xreg
-                                       0,             // xmul
-                                       0,             // disp
-                                       true);         // isJump
-            // clang-format on
-            CLANG_FORMAT_COMMENT_ANCHOR;
-        }
-#if FEATURE_FASTTAILCALL
-        else
-        {
-            genPopCalleeSavedRegisters(true);
-            // Fast tail call.
-            // Call target = REG_FASTTAILCALL_TARGET
-            // https://github.com/dotnet/coreclr/issues/4827
-            // Do we need a special encoding for stack walker like rex.w prefix for x64?
-
-            // TODO for LA: whether the relative address is enough for optimize?
-            GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0);
-        }
-#endif // FEATURE_FASTTAILCALL
-    }
-    else
-    {
-        genPopCalleeSavedRegisters(false);
-
-        GetEmitter()->emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_RA, 0);
-        compiler->unwindReturn(REG_RA);
-    }
-
-    compiler->unwindEndEpilog();
-}
+// see the codegenloongarch64.cpp
 
 #else // TARGET*
 #error Unsupported or unset target architecture
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 3d05d9299c273..69e3886056dba 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1426,6 +1426,176 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
 #endif // DEBUG
 }
 
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFnEpilog()\n");
+#endif // DEBUG
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, GetEmitter()->emitInitGCrefVars);
+    gcInfo.gcRegGCrefSetCur = GetEmitter()->emitInitGCrefRegs;
+    gcInfo.gcRegByrefSetCur = GetEmitter()->emitInitByrefRegs;
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+        printf("\n__epilog:\n");
+
+    if (verbose)
+    {
+        printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+        dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+        printf(", gcRegGCrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+        GetEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+        printf(", gcRegByrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegByrefSetCur);
+        GetEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+    GenTree* lastNode = block->lastNode();
+
+    // Method handle and address info used in case of jump epilog
+    CORINFO_METHOD_HANDLE methHnd = nullptr;
+    CORINFO_CONST_LOOKUP  addrInfo;
+    addrInfo.addr       = nullptr;
+    addrInfo.accessType = IAT_VALUE;
+
+    if (jmpEpilog && lastNode->gtOper == GT_JMP)
+    {
+        methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1;
+        compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+    }
+
+    compiler->unwindBegEpilog();
+
+    if (jmpEpilog)
+    {
+        SetHasTailCalls(true);
+
+        noway_assert(block->bbJumpKind == BBJ_RETURN);
+        noway_assert(block->GetFirstLIRNode() != nullptr);
+
+        /* figure out what jump we have */
+        GenTree* jmpNode = lastNode;
+#if !FEATURE_FASTTAILCALL
+        noway_assert(jmpNode->gtOper == GT_JMP);
+#else  // FEATURE_FASTTAILCALL
+        // armarch
+        // If jmpNode is GT_JMP then gtNext must be null.
+        // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+        noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
+
+        // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
+        noway_assert((jmpNode->gtOper == GT_JMP) ||
+                     ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
+
+        // The next block is associated with this "if" stmt
+        if (jmpNode->gtOper == GT_JMP)
+#endif // FEATURE_FASTTAILCALL
+        {
+            // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+            // the same descriptor with some minor adjustments.
+            assert(methHnd != nullptr);
+            assert(addrInfo.addr != nullptr);
+
+            emitter::EmitCallType callType;
+            void*                 addr;
+            regNumber             indCallReg;
+            switch (addrInfo.accessType)
+            {
+                case IAT_VALUE:
+                    //TODO-LOONGARCH64-CQ: using B/BL for optimization.
+                case IAT_PVALUE:
+                    // Load the address into a register, load indirect and call  through a register
+                    // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use
+                    callType   = emitter::EC_INDIR_R;
+                    indCallReg = REG_INDIRECT_CALL_TARGET_REG;
+                    addr       = NULL;
+                    instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
+                    if (addrInfo.accessType == IAT_PVALUE)
+                    {
+                        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                        regSet.verifyRegUsed(indCallReg);
+                    }
+                    break;
+
+                case IAT_RELPVALUE:
+                {
+                    // Load the address into a register, load relative indirect and call through a register
+                    // We have to use R12 since we assume the argument registers are in use
+                    // LR is used as helper register right before it is restored from stack, thus,
+                    // all relative address calculations are performed before LR is restored.
+                    callType   = emitter::EC_INDIR_R;
+                    indCallReg = REG_T2;
+                    addr       = NULL;
+
+                    regSet.verifyRegUsed(indCallReg);
+                    break;
+                }
+
+                case IAT_PPVALUE:
+                default:
+                    NO_WAY("Unsupported JMP indirection");
+            }
+
+            /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
+             * the same descriptor with some minor adjustments.
+             */
+
+            genPopCalleeSavedRegisters(true);
+
+            // clang-format off
+            GetEmitter()->emitIns_Call(callType,
+                                       methHnd,
+                                       INDEBUG_LDISASM_COMMA(nullptr)
+                                       addr,
+                                       0,          // argSize
+                                       EA_UNKNOWN // retSize
+                                       MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
+                                       gcInfo.gcVarPtrSetCur,
+                                       gcInfo.gcRegGCrefSetCur,
+                                       gcInfo.gcRegByrefSetCur,
+                                       DebugInfo(),
+                                       indCallReg,    // ireg
+                                       REG_NA,        // xreg
+                                       0,             // xmul
+                                       0,             // disp
+                                       true);         // isJump
+            // clang-format on
+            CLANG_FORMAT_COMMENT_ANCHOR;
+        }
+#if FEATURE_FASTTAILCALL
+        else
+        {
+            genPopCalleeSavedRegisters(true);
+            // Fast tail call.
+            // Call target = REG_FASTTAILCALL_TARGET
+            // https://github.com/dotnet/coreclr/issues/4827
+            // Do we need a special encoding for stack walker like rex.w prefix for x64?
+
+            // TODO for LA: whether the relative address is enough for optimize?
+            GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0);
+        }
+#endif // FEATURE_FASTTAILCALL
+    }
+    else
+    {
+        genPopCalleeSavedRegisters(false);
+
+        GetEmitter()->emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_RA, 0);
+        compiler->unwindReturn(REG_RA);
+    }
+
+    compiler->unwindEndEpilog();
+}
+
 /*
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -5633,6 +5803,70 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
     *pInitRegZeroed = false;
 }
 
+//------------------------------------------------------------------------
+// genEmitGSCookieCheck: Generate code to check that the GS cookie
+// wasn't thrashed by a buffer overrun.
+//
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+    noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+    // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
+    // executing GS cookie check will not collect the object pointed to by REG_INTRET (A0).
+    if (!pushReg && (compiler->info.compRetNativeType == TYP_REF))
+        gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+
+    // We need two temporary registers, to load the GS cookie values and compare them. We can't use
+    // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
+    // callee-trash registers, which should not contain anything interesting at this point.
+    // We don't have any IR node representing this check, so LSRA can't communicate registers
+    // for us to use.
+
+    regNumber regGSConst = REG_GSCOOKIE_TMP_0;
+    regNumber regGSValue = REG_GSCOOKIE_TMP_1;
+
+    if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+    {
+        // load the GS cookie constant into a reg
+        //
+        genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+    }
+    else
+    {
+        //// Ngen case - GS cookie constant needs to be accessed through an indirection.
+        // instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, regGSConst, regGSConst, 0);
+        if (compiler->opts.compReloc)
+        {
+            GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, regGSConst,
+                                       (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        }
+        else
+        { ////TODO:LoongArch64 should amend for optimize!
+            // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst,
+            // (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, );
+            GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, regGSConst,
+                                      ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfffff000) >> 12);
+            GetEmitter()->emitIns_R_I(INS_lu32i_d, EA_PTRSIZE, regGSConst,
+                                      (ssize_t)compiler->gsGlobalSecurityCookieAddr >> 32);
+            GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst,
+                                        ((ssize_t)compiler->gsGlobalSecurityCookieAddr & 0xfff) >> 2);
+        }
+        regSet.verifyRegUsed(regGSConst);
+    }
+    // Load this method's GS value from the stack frame
+    GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
+
+    // Compare with the GC cookie constant
+    BasicBlock* gsCheckBlk = genCreateTempLabel();
+    GetEmitter()->emitIns_J_cond_la(INS_beq, gsCheckBlk, regGSConst, regGSValue);
+
+    // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
+    genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
+    genDefineTempLabel(gsCheckBlk);
+}
+
 //---------------------------------------------------------------------
 // genIntrinsic - generate code for a given intrinsic
 //
@@ -8421,7 +8655,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
 
 /*-----------------------------------------------------------------------------
  *
- *  Push any callee-saved registers we have used
+ *  Push/Pop any callee-saved registers we have used
  */
 
 void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
@@ -8817,6 +9051,213 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     }
 }
 
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+    assert(compiler->compGeneratingEpilog);
+
+    regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+    if (isFramePointerUsed())
+    {
+        rsRestoreRegs |= RBM_FPBASE;
+    }
+
+    rsRestoreRegs |= RBM_RA; // We must save/restore the return address.
+
+    regMaskTP regsToRestoreMask = rsRestoreRegs;
+
+    int totalFrameSize = genTotalFrameSize();
+
+    int calleeSaveSPOffset = 0; // This will be the starting place for restoring
+                                // the callee-saved registers, in decreasing order.
+    int frameType         = 0;  // An indicator of what type of frame we are popping.
+    int calleeSaveSPDelta = 0;  // Amount to add to SP after callee-saved registers have been restored.
+
+    if (isFramePointerUsed())
+    {
+        if (totalFrameSize <= 2047)
+        {
+            if (compiler->compLocallocUsed)
+            {
+                int SPtoFPdelta = genSPtoFPdelta();
+                // Restore sp from fp
+                GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -SPtoFPdelta);
+                compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
+            }
+
+            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+            {
+                JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
+                        dspBool(compiler->compLocallocUsed));
+
+                frameType = 1;
+
+                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+                calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+            }
+            else
+            {
+                frameType = 2;
+
+                calleeSaveSPOffset = compiler->compLclFrameSize;
+
+                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
+                        dspBool(compiler->compLocallocUsed));
+            }
+            // calleeSaveSPDelta = 0;
+        }
+        else
+        {
+            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+            {
+                JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; "
+                        "localloc? %s\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
+                        dspBool(compiler->compLocallocUsed));
+
+                frameType = 3;
+
+                int outSzAligned;
+                if (compiler->lvaOutgoingArgSpaceSize >= 2040)
+                {
+                    int offset         = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                    calleeSaveSPDelta  = AlignUp((UINT)offset, STACK_ALIGN);
+                    calleeSaveSPOffset = calleeSaveSPDelta - offset;
+
+                    int offset2       = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
+                    calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN);
+                    offset2           = calleeSaveSPDelta - offset2;
+
+                    if (compiler->compLocallocUsed)
+                    {
+                        // Restore sp from fp
+                        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
+                        compiler->unwindSetFrameReg(REG_FPBASE, offset2);
+                    }
+                    else
+                    {
+                        outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf;
+                        // if (outSzAligned > 0)
+                        {
+                            genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true);
+                        }
+                    }
+
+                    regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
+                    compiler->unwindSaveReg(REG_RA, offset2 + 8);
+
+                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
+                    compiler->unwindSaveReg(REG_FP, offset2);
+
+                    genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+
+                    calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                    calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
+                }
+                else
+                {
+                    int offset2 = compiler->lvaOutgoingArgSpaceSize;
+                    if (compiler->compLocallocUsed)
+                    {
+                        // Restore sp from fp
+                        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
+                        compiler->unwindSetFrameReg(REG_FPBASE, offset2);
+                    }
+
+                    regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
+                    compiler->unwindSaveReg(REG_RA, offset2 + 8);
+
+                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
+                    compiler->unwindSaveReg(REG_FP, offset2);
+
+                    calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                    calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
+                    calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
+
+                    genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr,
+                                              /* reportUnwindData */ true);
+                }
+            }
+            else
+            {
+                frameType = 4;
+
+                JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n",
+                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
+                        dspBool(compiler->compLocallocUsed));
+
+                calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize;
+                calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
+                calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
+
+                if (compiler->compLocallocUsed)
+                {
+                    calleeSaveSPDelta = calleeSaveSPOffset + REGSIZE_BYTES;
+
+                    // Restore sp from fp
+                    GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -calleeSaveSPDelta);
+                    compiler->unwindSetFrameReg(REG_FPBASE, calleeSaveSPDelta);
+                }
+                else
+                {
+                    calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta;
+                    genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+                }
+
+                calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize;
+                calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
+            }
+        }
+    }
+    else
+    {
+        // No frame pointer (no chaining).
+        NYI("Frame without frame pointer");
+        calleeSaveSPOffset = 0;
+    }
+
+    JITDUMP("    calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta);
+    genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
+
+    if (frameType == 1)
+    {
+        calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize;
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8);
+        compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8);
+
+        GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset);
+        compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset);
+
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+        compiler->unwindAllocStack(totalFrameSize);
+    }
+    else if (frameType == 2)
+    {
+        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+        compiler->unwindAllocStack(totalFrameSize);
+    }
+    else if (frameType == 3)
+    {
+        // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else if (frameType == 4)
+    {
+        // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+    }
+    else
+    {
+        unreached();
+    }
+}
+
 //-----------------------------------------------------------------------------------
 // genProfilingEnterCallback: Generate the profiling function enter callback.
 //
@@ -8838,4 +9279,5 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
         return;
     }
 }
+
 #endif // TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index 2bb2e27f4b5ce..2d260ffed00ac 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -636,7 +636,7 @@ static const char * const  RegNames[] =
 // clang-format off
 /*static*/ const BYTE CodeGenInterface::instInfo[] =
 {
-    #define INSTS(id, nm, fp, info, fmt, e1) info,
+    #define INST(id, nm, fp, info, fmt, e1) info,
     #include "instrs.h"
 };
 // clang-format on
@@ -692,7 +692,7 @@ inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/)
     // clang-format off
     const static code_t insCode[] =
     {
-        #define INSTS(id, nm, fp, info, fmt, e1) e1,
+        #define INST(id, nm, fp, info, fmt, e1) e1,
         #include "instrs.h"
     };
     // clang-format on
@@ -726,31 +726,57 @@ void emitter::emitIns(instruction ins)
  *
  *  Add an Load/Store instruction(s): base+offset and base-addr-computing if needed.
  *  For referencing a stack-based local variable and a register
+ *
+ *  Special notes for LoongArch64:
+ *    The parameter `offs` has special info.
+ *    The real value of `offs` is positive.
+ *    If the `offs` is negtive which its real value abs(offs),
+ *    the negtive `offs` is special for optimizing the large offset which >2047.
+ *    when offs >2047 we can't encode one instruction to load/store the data,
+ *    if there are several load/store at this case, you have to repeat the similar
+ *    large offs with reduntant instructions and maybe eat up the `SC_IG_BUFFER_SIZE`.
+ *
+ *    Optimize the following:
+ *      lu12i.w  x0, 0x0
+ *      ori  x0, x0, 0x9ac
+ *      add.d  x0, x0, fp
+ *      fst.s  fa0, x0, 0
+ *
+ *    For the offs within range [0,0x7ff], using one instruction:
+ *      ori  x0, x0, offs
+ *    For the offs within range [0x1000,0xffffffff], using two instruction
+ *      lu12i.w  x0, offs-hi-20bits
+ *      ori  x0, x0, offs-low-12bits
+ *
+ *    Store/Load the data:
+ *      fstx.s  fa0, x0, fp
+ *
+ *    If the store/load are repeated,
+ *      addi_d  x0,x0,sizeof(type)
+ *      fstx.s  fa0, x0, fp
+ *
  */
 void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
 {
-    // assert(offs >= 0);
     ssize_t imm;
 
-    emitAttr size = EA_SIZE(attr); // it's better confirm attr with ins.
+    emitAttr size = EA_SIZE(attr);
 
 #ifdef DEBUG
     switch (ins)
     {
         case INS_st_b:
         case INS_st_h:
+
         case INS_st_w:
         case INS_fst_s:
-        // case INS_swl:
-        // case INS_swr:
-        // case INS_sdl:
-        // case INS_sdr:
+
         case INS_st_d:
         case INS_fst_d:
             break;
 
         default:
-            NYI("emitIns_S_R"); // FP locals?
+            NYI("emitIns_S_R");
             return;
 
     } // end switch (ins)
@@ -806,12 +832,14 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
     appendToCurIG(id);
 }
 
+/*
+ *  Special notes for `offs`, please see the comment for `emitter::emitIns_S_R`.
+ */
 void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
 {
-    // assert(offs >= 0);
     ssize_t imm;
 
-    emitAttr size = EA_SIZE(attr); // it's better confirm attr with ins.
+    emitAttr size = EA_SIZE(attr);
 
 #ifdef DEBUG
     switch (ins)
@@ -829,12 +857,6 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
         case INS_ld_d:
         case INS_fld_d:
 
-            // case INS_lwl:
-            // case INS_lwr:
-
-            // case INS_ldl:
-            // case INS_ldr:
-            // assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size));
             break;
 
         case INS_lea:
@@ -842,7 +864,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
             break;
 
         default:
-            NYI("emitIns_R_S"); // FP locals?
+            NYI("emitIns_R_S");
             return;
 
     } // end switch (ins)
@@ -896,13 +918,11 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
             code = emitInsCode(ins);
             D_INST_2RI12(code, reg1 /* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2);
         }
-        // reg2 = REG_RA;
     }
 
     instrDesc* id = emitNewInstr(attr);
 
     id->idReg1(reg1);
-    // id->idReg2(reg2);//not used.
 
     id->idIns(ins);
 
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 658cb84f4cf3d..f9eb57fe549ad 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -3160,7 +3160,7 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ
             *pCostEx += idx->GetCostEx();
             *pCostSz += idx->GetCostSz();
         }
-        // TODO: workround, should amend for LoongArch64.
+        // TODO-LOONGARCH64: workround, should amend for LoongArch64.
         if (cns != 0)
         {
             if (cns >= (4096 * genTypeSize(type)))
@@ -3587,7 +3587,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
             case GT_CNS_STR:
             case GT_CNS_LNG:
             case GT_CNS_INT:
-                // TODO: workround, should amend for LoongArch64.
+                // TODO-LOONGARCH64: workround, should amend for LoongArch64.
                 costEx = 4;
                 costSz = 4;
                 goto COMMON_CNS;
@@ -3653,7 +3653,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                     costSz = 4;
                 }
 #elif defined(TARGET_LOONGARCH64)
-                // TODO: workround, should amend for LoongArch64.
+                // TODO-LOONGARCH64: workround, should amend for LoongArch64.
                 costEx = 2;
                 costSz = 8;
 #else
@@ -3830,7 +3830,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                         costSz = 6;
                     }
 #elif defined(TARGET_LOONGARCH64)
-                    // TODO: workround, should amend for LoongArch64.
+                    // TODO-LOONGARCH64: workround, should amend for LoongArch64.
                     costEx = 1;
                     costSz = 2;
                     if (isflt || varTypeIsFloating(op1->TypeGet()))
@@ -6025,6 +6025,10 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
         case TYP_INT:
 #ifdef TARGET_LOONGARCH64
         case TYP_UINT:
+            // For LoongArch64, the register $r0 is always const-zero with 64bits-width.
+            // Besides the instructions's operation of the 64bits and 32bits using the whole
+            // 64bits-width register which is unlike the AMD64 and ARM64.
+            // So for UINT type, LoongArch64 can't share with INT liking AMD64 and ARM64.
 #endif
             zero = gtNewIconNode(0);
             break;
@@ -13657,7 +13661,12 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
         case TYP_INT:
 
 #ifdef TARGET_LOONGARCH64
-            assert(tree->TypeIs(TYP_INT) || tree->TypeIs(TYP_LONG) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY));
+            // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+            // 64bits-width register which is unlike the AMD64 and ARM64.
+            // And the INT type instruction will be signed-extend by default.
+            // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+            // will be signed-extend by default.
+            assert(tree->TypeIs(TYP_INT, TYP_LONG) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY));
 #else
             assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY));
 #endif
@@ -21873,30 +21882,30 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
 #elif defined(TARGET_LOONGARCH64)
             assert((structSize >= TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE)));
 
-            uint32_t numFloatFields = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(retClsHnd);
-            BYTE     gcPtrs[2]      = {TYPE_GC_NONE, TYPE_GC_NONE};
+            uint32_t floatFieldFlags = comp->info.compCompHnd->getLoongArch64PassStructInRegisterFlags(retClsHnd);
+            BYTE     gcPtrs[2]       = {TYPE_GC_NONE, TYPE_GC_NONE};
             comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
 
-            if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO)
+            if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
             {
-                assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
-                m_regType[0]                = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
-                m_regType[1]                = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
                 comp->compFloatingPointUsed = true;
+                assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
+                m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
             }
-            else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST)
+            else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST)
             {
-                assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
-                m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
-                m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
                 comp->compFloatingPointUsed = true;
+                assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
+                m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
             }
-            else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND)
+            else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
             {
-                assert((structSize > 8) == ((numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
-                m_regType[0] = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
-                m_regType[1] = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
                 comp->compFloatingPointUsed = true;
+                assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
+                m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
+                m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
             }
             else
             {
@@ -22100,13 +22109,13 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const
     var_types regType = GetReturnRegType(idx);
     if (idx == 0)
     {
-        resultReg = varTypeIsIntegralOrI(regType) ? REG_INTRET : REG_FLOATRET; // V0 or F0
+        resultReg = varTypeIsIntegralOrI(regType) ? REG_INTRET : REG_FLOATRET; // A0 or F0
     }
     else
     {
         noway_assert(idx < 2); // Up to 2 return registers for two-float-field structs
         if (varTypeIsIntegralOrI(regType))
-            resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // V0 or V1
+            resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // A0 or A1
         else // if (!varTypeIsIntegralOrI(regType))
             resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1
     }
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index d6e424baf0639..394d219a99dd7 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -8444,7 +8444,7 @@ bool Compiler::impTailCallRetTypeCompatible(bool                     allowWideni
     {
         return (varTypeIsIntegral(calleeRetType) || isCalleeRetTypMBEnreg) && (callerRetTypeSize == calleeRetTypeSize);
     }
-#endif // TARGET_AMD64 || TARGET_ARM64
+#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64
 
     return false;
 }
@@ -10296,7 +10296,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree*                 op,
         return impAssignMultiRegTypeToVar(op, retClsHnd DEBUGARG(unmgdCallConv));
     }
 
-#endif //  FEATURE_MULTIREG_RET && TARGET_ARM64
+#endif //  FEATURE_MULTIREG_RET && (TARGET_ARM64 || TARGET_LOONGARCH64)
 
     if (!op->IsCall() || !op->AsCall()->TreatAsHasRetBufArg(this))
     {
@@ -11313,6 +11313,11 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
 #ifdef TARGET_LOONGARCH64
             if (op1->TypeGet() == TYP_INT && op1->gtOper == GT_CNS_INT)
             {
+                // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+                // 64bits-width register which is unlike the AMD64 and ARM64.
+                // And the INT type instruction will be signed-extend by default.
+                // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+                // will be signed-extend by default.
                 op1->AsIntCon()->gtIconVal =
                     fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal;
                 op1->gtType = TYP_LONG;
@@ -11329,6 +11334,11 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
 #ifdef TARGET_LOONGARCH64
             if (op2->TypeGet() == TYP_INT && op2->gtOper == GT_CNS_INT)
             {
+                // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+                // 64bits-width register which is unlike the AMD64 and ARM64.
+                // And the INT type instruction will be signed-extend by default.
+                // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+                // will be signed-extend by default.
                 op2->AsIntCon()->gtIconVal =
                     fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal;
                 op2->gtType = TYP_LONG;
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 8f27dd6c231d9..83099e7018786 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -67,7 +67,7 @@ const char* CodeGen::genInsName(instruction ins)
         #include "instrs.h"
 
 #elif defined(TARGET_LOONGARCH64)
-        #define INSTS(id, nm, fp, ldst, fmt, e1) nm,
+        #define INST(id, nm, fp, ldst, fmt, e1) nm,
         #include "instrs.h"
 
 #else
@@ -542,9 +542,7 @@ void CodeGen::inst_RV_RV_RV(instruction ins,
 {
 #ifdef TARGET_ARM
     GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3, flags);
-#elif defined(TARGET_LOONGARCH64)
-    GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3);
-#elif defined(TARGET_XARCH)
+#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64)
     GetEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3);
 #else
     NYI("inst_RV_RV_RV");
@@ -902,11 +900,12 @@ void CodeGen::inst_RV_TT(instruction ins,
             }
 #else // !TARGET_ARM
 #ifdef TARGET_LOONGARCH64
+            // For LoongArch64-ABI, the float arg might be passed by integer register,
+            // when there is no float register left but there is integer register(s) left.
             if (emitter::isFloatReg(reg))
                 assert((ins == INS_fld_d) || (ins == INS_fld_s));
             else if (emitter::isGeneralRegister(reg) && (ins != INS_lea))
-            { // TODO should amend for LOONGARCH64 !!!
-                // assert((ins==INS_ld_d) || (ins==INS_ld_w));
+            {
                 ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d;
             }
 #endif
@@ -1493,9 +1492,7 @@ bool CodeGenInterface::validImmForBL(ssize_t addr)
  */
 instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg)
 {
-#ifdef TARGET_LOONGARCH64
-    assert(!"unimplemented yet on LoongArch64 for unused.");
-#endif
+    NYI_LOONGARCH64("ins_Move_Extend");
 
     instruction ins = INS_invalid;
 
@@ -1678,8 +1675,6 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
         }
 #elif defined(TARGET_ARM64)
         return INS_ldr;
-//#elif defined(TARGET_LOONGARCH64)
-//        //TODO: add SIMD for LoongArch64.
 #else
         assert(!"ins_Load with SIMD type");
 #endif
@@ -1780,7 +1775,6 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
     }
     else
     {
-        // assert((TYP_LONG == srcType) || (TYP_ULONG == srcType));
         ins = INS_ld_d; // default ld_d.
     }
 #else
@@ -2008,7 +2002,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
         ins = aligned ? INS_stx_h : INS_st_h;
     else if ((TYP_INT == dstType) || (TYP_UINT == dstType))
         ins = aligned ? INS_stx_w : INS_st_w;
-    else // if ((TYP_LONG == dstType) || (TYP_ULONG == dstType) || (TYP_REF == dstType))
+    else
         ins = aligned ? INS_stx_d : INS_st_d;
 #else
     NYI("ins_Store");
diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h
index 2021f0251278a..87b004eaf7be1 100644
--- a/src/coreclr/jit/instr.h
+++ b/src/coreclr/jit/instr.h
@@ -52,12 +52,9 @@ enum instruction : unsigned
     INS_lea,   // Not a real instruction. It is used for load the address of stack locals
 
 #elif defined(TARGET_LOONGARCH64)
-    #define INSTS(id, nm, fp, ldst, fmt, e1) INS_##id,
+    #define INST(id, nm, fp, ldst, fmt, e1) INS_##id,
     #include "instrs.h"
 
-    //INS_dneg,  // Not a real instruction. It will be translated to dsubu.
-    //INS_neg,   // Not a real instruction. It will be translated to subu.
-    //INS_not,   // Not a real instruction. It will be translated to nor.
     INS_lea,   // Not a real instruction. It is used for load the address of stack locals
 #else
 #error Unsupported target architecture
@@ -152,7 +149,7 @@ enum insFlags : uint32_t
     INS_FLAGS_DONT_CARE = 0x00,
 };
 
-#elif defined(TARGET_ARM) || defined(TARGET_ARM64)
+#elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 // TODO-Cleanup: Move 'insFlags' under TARGET_ARM
 enum insFlags: unsigned
 {
@@ -160,13 +157,6 @@ enum insFlags: unsigned
     INS_FLAGS_SET = 0x01,
     INS_FLAGS_DONT_CARE = 0x02,
 };
-#elif defined(TARGET_LOONGARCH64)
-enum insFlags: unsigned
-{
-    INS_FLAGS_NOT_SET = 0x00,
-    INS_FLAGS_SET = 0x01,
-    INS_FLAGS_DONT_CARE = 0x02,
-};
 #else
 #error Unsupported target architecture
 #endif
@@ -298,6 +288,7 @@ enum insBarrier : unsigned
     INS_BARRIER_OSHLD =  1,
     INS_BARRIER_OSHST =  2,
     INS_BARRIER_OSH   =  3,
+
     INS_BARRIER_NSHLD =  5,
     INS_BARRIER_NSHST =  6,
     INS_BARRIER_NSH   =  7,
@@ -321,11 +312,8 @@ enum insOpts : unsigned
     INS_OPTS_J,      // see ::emitIns_J().
     INS_OPTS_J_cond, // see ::emitIns_J_cond_la().
     INS_OPTS_I,      // see ::emitIns_I_la().
-    //INS_OPTS_J2,   // see ::emitIns_J().
     INS_OPTS_C,      // see ::emitIns_Call().
     INS_OPTS_RELOC,  // see ::emitIns_R_AI().
-    //INS_OPTS_,     // see ::().
-    //INS_OPTS_,     // see ::().
 };
 
 enum insBarrier : unsigned
diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h
index 3e54bce650d88..99cf4304a6200 100644
--- a/src/coreclr/jit/instrsloongarch64.h
+++ b/src/coreclr/jit/instrsloongarch64.h
@@ -21,8 +21,8 @@
 #error Unexpected target type
 #endif
 
-#ifndef INSTS
-#error INSTS must be defined before including this file.
+#ifndef INST
+#error INST must be defined before including this file.
 #endif
 
 /*****************************************************************************/
@@ -35,30 +35,30 @@
 //     emitInsMayWriteMultipleRegs in emitLoongarch64.cpp.
 
 // clang-format off
-INSTS(invalid, "INVALID", 0, 0, IF_NONE,  BAD_CODE)
+INST(invalid, "INVALID", 0, 0, IF_NONE,  BAD_CODE)
 
 
-INSTS(nop ,	"nop",	0,	0,	IF_LA,	0x03400000)
+INST(nop ,	"nop",	0,	0,	IF_LA,	0x03400000)
 
 ////INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number.
-INSTS(bceqz,	"bceqz",	0,	0,	IF_LA,	0x48000000)
-INSTS(bcnez,	"bcnez",	0,	0,	IF_LA,	0x48000100)
+INST(bceqz,	"bceqz",	0,	0,	IF_LA,	0x48000000)
+INST(bcnez,	"bcnez",	0,	0,	IF_LA,	0x48000100)
 
-INSTS(beq,	"beq",	0,	0,	IF_LA,	0x58000000)
-INSTS(bne,	"bne",	0,	0,	IF_LA,	0x5c000000)
+INST(beq,	"beq",	0,	0,	IF_LA,	0x58000000)
+INST(bne,	"bne",	0,	0,	IF_LA,	0x5c000000)
 
-INSTS(blt,	"blt",	0,	0,	IF_LA,	0x60000000)
-INSTS(bge,	"bge",	0,	0,	IF_LA,	0x64000000)
-INSTS(bltu,	"bltu",	0,	0,	IF_LA,	0x68000000)
-INSTS(bgeu,	"bgeu",	0,	0,	IF_LA,	0x6c000000)
+INST(blt,	"blt",	0,	0,	IF_LA,	0x60000000)
+INST(bge,	"bge",	0,	0,	IF_LA,	0x64000000)
+INST(bltu,	"bltu",	0,	0,	IF_LA,	0x68000000)
+INST(bgeu,	"bgeu",	0,	0,	IF_LA,	0x6c000000)
 
 ////R_I.
-INSTS(beqz,	"beqz",	0,	0,	IF_LA,	0x40000000)
-INSTS(bnez,	"bnez",	0,	0,	IF_LA,	0x44000000)
+INST(beqz,	"beqz",	0,	0,	IF_LA,	0x40000000)
+INST(bnez,	"bnez",	0,	0,	IF_LA,	0x44000000)
 
 ////I.
-INSTS(b,	"b",	0,	0,	IF_LA,	0x50000000)
-INSTS(bl,	"bl",	0,	0,	IF_LA,	0x54000000)
+INST(b,	"b",	0,	0,	IF_LA,	0x50000000)
+INST(bl,	"bl",	0,	0,	IF_LA,	0x54000000)
 
 ////////////////////////////////////////////////
 ////NOTE:  Begin
@@ -67,217 +67,217 @@ INSTS(bl,	"bl",	0,	0,	IF_LA,	0x54000000)
 //    enum     name     FP LD/ST   FMT   ENCODE
 
 ////NOTE: mov must be the first one !!! more info to see emitter::emitInsMayWriteToGCReg().
-INSTS(mov,     "mov",    0, 0, IF_LA, 0x03800000)
+INST(mov,     "mov",    0, 0, IF_LA, 0x03800000)
       //  mov     rd,rj
       //NOTE: On loongarch, usually it's name is move, but here for compatible using mov.
       //      In fact, mov is an alias commond, "ori rd,rj,0"
-INSTS(dneg,            "dneg",         0, 0, IF_LA,  0x00118000)
+INST(dneg,            "dneg",         0, 0, IF_LA,  0x00118000)
         //dneg is a alias instruction.
         //sub_d rd, zero, rk
-INSTS(neg,             "neg",          0, 0, IF_LA,  0x00110000)
+INST(neg,             "neg",          0, 0, IF_LA,  0x00110000)
         //neg is a alias instruction.
         //sub_w rd, zero, rk
-INSTS(not,             "not",          0, 0, IF_LA,  0x00140000)
+INST(not,             "not",          0, 0, IF_LA,  0x00140000)
         //not is a alias instruction.
         //nor rd, rj, zero
 
 //    enum:id          name     FP LD/ST   Formate     ENCODE
 ////R_R_R.
-INSTS(add_w,	"add.w",	0,	0,	IF_LA,	0x00100000)
-INSTS(add_d,	"add.d",	0,	0,	IF_LA,	0x00108000)
-INSTS(sub_w,	"sub.w",	0,	0,	IF_LA,	0x00110000)
-INSTS(sub_d,	"sub.d",	0,	0,	IF_LA,	0x00118000)
-
-INSTS(and,	"and",	0,	0,	IF_LA,	0x00148000)
-INSTS(or,	"or",	0,	0,	IF_LA,	0x00150000)
-INSTS(nor,	"nor",	0,	0,	IF_LA,	0x00140000)
-INSTS(xor,	"xor",	0,	0,	IF_LA,	0x00158000)
-INSTS(andn,	"andn",	0,	0,	IF_LA,	0x00168000)
-INSTS(orn,	"orn",	0,	0,	IF_LA,	0x00160000)
-
-INSTS(mul_w,	"mul.w",	0,	0,	IF_LA,	0x001c0000)
-INSTS(mul_d,	"mul.d",	0,	0,	IF_LA,	0x001d8000)
-INSTS(mulh_w,	"mulh.w",	0,	0,	IF_LA,	0x001c8000)
-INSTS(mulh_wu,	"mulh.wu",	0,	0,	IF_LA,	0x001d0000)
-INSTS(mulh_d,	"mulh.d",	0,	0,	IF_LA,	0x001e0000)
-INSTS(mulh_du,	"mulh.du",	0,	0,	IF_LA,	0x001e8000)
-INSTS(mulw_d_w,	"mulw.d.w",	0,	0,	IF_LA,	0x001f0000)
-INSTS(mulw_d_wu,	"mulw.d.wu",	0,	0,	IF_LA,	0x001f8000)
-INSTS(div_w,	"div.w",	0,	0,	IF_LA,	0x00200000)
-INSTS(div_wu,	"div.wu",	0,	0,	IF_LA,	0x00210000)
-INSTS(div_d,	"div.d",	0,	0,	IF_LA,	0x00220000)
-INSTS(div_du,	"div.du",	0,	0,	IF_LA,	0x00230000)
-INSTS(mod_w,	"mod.w",	0,	0,	IF_LA,	0x00208000)
-INSTS(mod_wu,	"mod.wu",	0,	0,	IF_LA,	0x00218000)
-INSTS(mod_d,	"mod.d",	0,	0,	IF_LA,	0x00228000)
-INSTS(mod_du,	"mod.du",	0,	0,	IF_LA,	0x00238000)
-
-INSTS(sll_w,	"sll.w",	0,	0,	IF_LA,	0x00170000)
-INSTS(srl_w,	"srl.w",	0,	0,	IF_LA,	0x00178000)
-INSTS(sra_w,	"sra.w",	0,	0,	IF_LA,	0x00180000)
-INSTS(rotr_w,	"rotr_w",	0,	0,	IF_LA,	0x001b0000)
-INSTS(sll_d,	"sll.d",	0,	0,	IF_LA,	0x00188000)
-INSTS(srl_d,	"srl.d",	0,	0,	IF_LA,	0x00190000)
-INSTS(sra_d,	"sra.d",	0,	0,	IF_LA,	0x00198000)
-INSTS(rotr_d,	"rotr.d",	0,	0,	IF_LA,	0x001b8000)
-
-INSTS(maskeqz,	"maskeqz",	0,	0,	IF_LA,	0x00130000)
-INSTS(masknez,	"masknez",	0,	0,	IF_LA,	0x00138000)
-
-INSTS(slt,	"slt",	0,	0,	IF_LA,	0x00120000)
-INSTS(sltu,	"sltu",	0,	0,	IF_LA,	0x00128000)
-
-INSTS(amswap_w,	"amswap.w",	0,	0,	IF_LA,	0x38600000)
-INSTS(amswap_d,	"amswap.d",	0,	0,	IF_LA,	0x38608000)
-INSTS(amswap_db_w,	"amswap_db.w",	0,	0,	IF_LA,	0x38690000)
-INSTS(amswap_db_d,	"amswap_db.d",	0,	0,	IF_LA,	0x38698000)
-INSTS(amadd_w,	"amadd.w",	0,	0,	IF_LA,	0x38610000)
-INSTS(amadd_d,	"amadd.d",	0,	0,	IF_LA,	0x38618000)
-INSTS(amadd_db_w,	"amadd_db.w",	0,	0,	IF_LA,	0x386a0000)
-INSTS(amadd_db_d,	"amadd_db.d",	0,	0,	IF_LA,	0x386a8000)
-INSTS(amand_w,	"amand.w",	0,	0,	IF_LA,	0x38620000)
-INSTS(amand_d,	"amand.d",	0,	0,	IF_LA,	0x38628000)
-INSTS(amand_db_w,	"amand_db.w",	0,	0,	IF_LA,	0x386b0000)
-INSTS(amand_db_d,	"amand_db.d",	0,	0,	IF_LA,	0x386b8000)
-INSTS(amor_w,	"amor.w",	0,	0,	IF_LA,	0x38630000)
-INSTS(amor_d,	"amor.d",	0,	0,	IF_LA,	0x38638000)
-INSTS(amor_db_w,	"amor_db.w",	0,	0,	IF_LA,	0x386c0000)
-INSTS(amor_db_d,	"amor_db.d",	0,	0,	IF_LA,	0x386c8000)
-INSTS(amxor_w,	"amxor.w",	0,	0,	IF_LA,	0x38640000)
-INSTS(amxor_d,	"amxor.d",	0,	0,	IF_LA,	0x38648000)
-INSTS(amxor_db_w,	"amxor_db.w",	0,	0,	IF_LA,	0x386d0000)
-INSTS(amxor_db_d,	"amxor_db.d",	0,	0,	IF_LA,	0x386d8000)
-INSTS(ammax_w,	"ammax.w",	0,	0,	IF_LA,	0x38650000)
-INSTS(ammax_d,	"ammax.d",	0,	0,	IF_LA,	0x38658000)
-INSTS(ammax_db_w,	"ammax_db.w",	0,	0,	IF_LA,	0x386e0000)
-INSTS(ammax_db_d,	"ammax_db.d",	0,	0,	IF_LA,	0x386e8000)
-INSTS(ammin_w,	"ammin.w",	0,	0,	IF_LA,	0x38660000)
-INSTS(ammin_d,	"ammin.d",	0,	0,	IF_LA,	0x38668000)
-INSTS(ammin_db_w,	"ammin_db.w",	0,	0,	IF_LA,	0x386f0000)
-INSTS(ammin_db_d,	"ammin_db.d",	0,	0,	IF_LA,	0x386f8000)
-INSTS(ammax_wu,	"ammax.wu",	0,	0,	IF_LA,	0x38670000)
-INSTS(ammax_du,	"ammax.du",	0,	0,	IF_LA,	0x38678000)
-INSTS(ammax_db_wu,	"ammax_db.wu",	0,	0,	IF_LA,	0x38700000)
-INSTS(ammax_db_du,	"ammax_db.du",	0,	0,	IF_LA,	0x38708000)
-INSTS(ammin_wu,	"ammin.wu",	0,	0,	IF_LA,	0x38680000)
-INSTS(ammin_du,	"ammin.du",	0,	0,	IF_LA,	0x38688000)
-INSTS(ammin_db_wu,	"ammin_db.wu",	0,	0,	IF_LA,	0x38710000)
-INSTS(ammin_db_du,	"ammin_db.du",	0,	0,	IF_LA,	0x38718000)
-
-INSTS(crc_w_b_w,	"crc.w.b.w",	0,	0,	IF_LA,	0x00240000)
-INSTS(crc_w_h_w,	"crc.w.h.w",	0,	0,	IF_LA,	0x00248000)
-INSTS(crc_w_w_w,	"crc.w.w.w",	0,	0,	IF_LA,	0x00250000)
-INSTS(crc_w_d_w,	"crc.w.d.w",	0,	0,	IF_LA,	0x00258000)
-INSTS(crcc_w_b_w,	"crcc.w.b.w",	0,	0,	IF_LA,	0x00260000)
-INSTS(crcc_w_h_w,	"crcc.w.h.w",	0,	0,	IF_LA,	0x00268000)
-INSTS(crcc_w_w_w,	"crcc.w.w.w",	0,	0,	IF_LA,	0x00270000)
-INSTS(crcc_w_d_w,	"crcc.w.d.w",	0,	0,	IF_LA,	0x00278000)
+INST(add_w,	"add.w",	0,	0,	IF_LA,	0x00100000)
+INST(add_d,	"add.d",	0,	0,	IF_LA,	0x00108000)
+INST(sub_w,	"sub.w",	0,	0,	IF_LA,	0x00110000)
+INST(sub_d,	"sub.d",	0,	0,	IF_LA,	0x00118000)
+
+INST(and,	"and",	0,	0,	IF_LA,	0x00148000)
+INST(or,	"or",	0,	0,	IF_LA,	0x00150000)
+INST(nor,	"nor",	0,	0,	IF_LA,	0x00140000)
+INST(xor,	"xor",	0,	0,	IF_LA,	0x00158000)
+INST(andn,	"andn",	0,	0,	IF_LA,	0x00168000)
+INST(orn,	"orn",	0,	0,	IF_LA,	0x00160000)
+
+INST(mul_w,	"mul.w",	0,	0,	IF_LA,	0x001c0000)
+INST(mul_d,	"mul.d",	0,	0,	IF_LA,	0x001d8000)
+INST(mulh_w,	"mulh.w",	0,	0,	IF_LA,	0x001c8000)
+INST(mulh_wu,	"mulh.wu",	0,	0,	IF_LA,	0x001d0000)
+INST(mulh_d,	"mulh.d",	0,	0,	IF_LA,	0x001e0000)
+INST(mulh_du,	"mulh.du",	0,	0,	IF_LA,	0x001e8000)
+INST(mulw_d_w,	"mulw.d.w",	0,	0,	IF_LA,	0x001f0000)
+INST(mulw_d_wu,	"mulw.d.wu",	0,	0,	IF_LA,	0x001f8000)
+INST(div_w,	"div.w",	0,	0,	IF_LA,	0x00200000)
+INST(div_wu,	"div.wu",	0,	0,	IF_LA,	0x00210000)
+INST(div_d,	"div.d",	0,	0,	IF_LA,	0x00220000)
+INST(div_du,	"div.du",	0,	0,	IF_LA,	0x00230000)
+INST(mod_w,	"mod.w",	0,	0,	IF_LA,	0x00208000)
+INST(mod_wu,	"mod.wu",	0,	0,	IF_LA,	0x00218000)
+INST(mod_d,	"mod.d",	0,	0,	IF_LA,	0x00228000)
+INST(mod_du,	"mod.du",	0,	0,	IF_LA,	0x00238000)
+
+INST(sll_w,	"sll.w",	0,	0,	IF_LA,	0x00170000)
+INST(srl_w,	"srl.w",	0,	0,	IF_LA,	0x00178000)
+INST(sra_w,	"sra.w",	0,	0,	IF_LA,	0x00180000)
+INST(rotr_w,	"rotr_w",	0,	0,	IF_LA,	0x001b0000)
+INST(sll_d,	"sll.d",	0,	0,	IF_LA,	0x00188000)
+INST(srl_d,	"srl.d",	0,	0,	IF_LA,	0x00190000)
+INST(sra_d,	"sra.d",	0,	0,	IF_LA,	0x00198000)
+INST(rotr_d,	"rotr.d",	0,	0,	IF_LA,	0x001b8000)
+
+INST(maskeqz,	"maskeqz",	0,	0,	IF_LA,	0x00130000)
+INST(masknez,	"masknez",	0,	0,	IF_LA,	0x00138000)
+
+INST(slt,	"slt",	0,	0,	IF_LA,	0x00120000)
+INST(sltu,	"sltu",	0,	0,	IF_LA,	0x00128000)
+
+INST(amswap_w,	"amswap.w",	0,	0,	IF_LA,	0x38600000)
+INST(amswap_d,	"amswap.d",	0,	0,	IF_LA,	0x38608000)
+INST(amswap_db_w,	"amswap_db.w",	0,	0,	IF_LA,	0x38690000)
+INST(amswap_db_d,	"amswap_db.d",	0,	0,	IF_LA,	0x38698000)
+INST(amadd_w,	"amadd.w",	0,	0,	IF_LA,	0x38610000)
+INST(amadd_d,	"amadd.d",	0,	0,	IF_LA,	0x38618000)
+INST(amadd_db_w,	"amadd_db.w",	0,	0,	IF_LA,	0x386a0000)
+INST(amadd_db_d,	"amadd_db.d",	0,	0,	IF_LA,	0x386a8000)
+INST(amand_w,	"amand.w",	0,	0,	IF_LA,	0x38620000)
+INST(amand_d,	"amand.d",	0,	0,	IF_LA,	0x38628000)
+INST(amand_db_w,	"amand_db.w",	0,	0,	IF_LA,	0x386b0000)
+INST(amand_db_d,	"amand_db.d",	0,	0,	IF_LA,	0x386b8000)
+INST(amor_w,	"amor.w",	0,	0,	IF_LA,	0x38630000)
+INST(amor_d,	"amor.d",	0,	0,	IF_LA,	0x38638000)
+INST(amor_db_w,	"amor_db.w",	0,	0,	IF_LA,	0x386c0000)
+INST(amor_db_d,	"amor_db.d",	0,	0,	IF_LA,	0x386c8000)
+INST(amxor_w,	"amxor.w",	0,	0,	IF_LA,	0x38640000)
+INST(amxor_d,	"amxor.d",	0,	0,	IF_LA,	0x38648000)
+INST(amxor_db_w,	"amxor_db.w",	0,	0,	IF_LA,	0x386d0000)
+INST(amxor_db_d,	"amxor_db.d",	0,	0,	IF_LA,	0x386d8000)
+INST(ammax_w,	"ammax.w",	0,	0,	IF_LA,	0x38650000)
+INST(ammax_d,	"ammax.d",	0,	0,	IF_LA,	0x38658000)
+INST(ammax_db_w,	"ammax_db.w",	0,	0,	IF_LA,	0x386e0000)
+INST(ammax_db_d,	"ammax_db.d",	0,	0,	IF_LA,	0x386e8000)
+INST(ammin_w,	"ammin.w",	0,	0,	IF_LA,	0x38660000)
+INST(ammin_d,	"ammin.d",	0,	0,	IF_LA,	0x38668000)
+INST(ammin_db_w,	"ammin_db.w",	0,	0,	IF_LA,	0x386f0000)
+INST(ammin_db_d,	"ammin_db.d",	0,	0,	IF_LA,	0x386f8000)
+INST(ammax_wu,	"ammax.wu",	0,	0,	IF_LA,	0x38670000)
+INST(ammax_du,	"ammax.du",	0,	0,	IF_LA,	0x38678000)
+INST(ammax_db_wu,	"ammax_db.wu",	0,	0,	IF_LA,	0x38700000)
+INST(ammax_db_du,	"ammax_db.du",	0,	0,	IF_LA,	0x38708000)
+INST(ammin_wu,	"ammin.wu",	0,	0,	IF_LA,	0x38680000)
+INST(ammin_du,	"ammin.du",	0,	0,	IF_LA,	0x38688000)
+INST(ammin_db_wu,	"ammin_db.wu",	0,	0,	IF_LA,	0x38710000)
+INST(ammin_db_du,	"ammin_db.du",	0,	0,	IF_LA,	0x38718000)
+
+INST(crc_w_b_w,	"crc.w.b.w",	0,	0,	IF_LA,	0x00240000)
+INST(crc_w_h_w,	"crc.w.h.w",	0,	0,	IF_LA,	0x00248000)
+INST(crc_w_w_w,	"crc.w.w.w",	0,	0,	IF_LA,	0x00250000)
+INST(crc_w_d_w,	"crc.w.d.w",	0,	0,	IF_LA,	0x00258000)
+INST(crcc_w_b_w,	"crcc.w.b.w",	0,	0,	IF_LA,	0x00260000)
+INST(crcc_w_h_w,	"crcc.w.h.w",	0,	0,	IF_LA,	0x00268000)
+INST(crcc_w_w_w,	"crcc.w.w.w",	0,	0,	IF_LA,	0x00270000)
+INST(crcc_w_d_w,	"crcc.w.d.w",	0,	0,	IF_LA,	0x00278000)
 
 ////R_R_R_I.
-INSTS(alsl_w,	"alsl.w",	0,	0,	IF_LA,	0x00040000)
-INSTS(alsl_wu,	"alsl.wu",	0,	0,	IF_LA,	0x00060000)
-INSTS(alsl_d,	"alsl.d",	0,	0,	IF_LA,	0x002c0000)
+INST(alsl_w,	"alsl.w",	0,	0,	IF_LA,	0x00040000)
+INST(alsl_wu,	"alsl.wu",	0,	0,	IF_LA,	0x00060000)
+INST(alsl_d,	"alsl.d",	0,	0,	IF_LA,	0x002c0000)
 
-INSTS(bytepick_w,	"bytepick.w",	0,	0,	IF_LA,	0x00080000)
-INSTS(bytepick_d,	"bytepick.d",	0,	0,	IF_LA,	0x000c0000)
+INST(bytepick_w,	"bytepick.w",	0,	0,	IF_LA,	0x00080000)
+INST(bytepick_d,	"bytepick.d",	0,	0,	IF_LA,	0x000c0000)
 
-INSTS(fsel,	"fsel",	0,	0,	IF_LA,	0x0d000000)
+INST(fsel,	"fsel",	0,	0,	IF_LA,	0x0d000000)
 
 ////R_I.
-INSTS(lu12i_w,	"lu12i.w",	0,	0,	IF_LA,	0x14000000)
-INSTS(lu32i_d,	"lu32i.d",	0,	0,	IF_LA,	0x16000000)
+INST(lu12i_w,	"lu12i.w",	0,	0,	IF_LA,	0x14000000)
+INST(lu32i_d,	"lu32i.d",	0,	0,	IF_LA,	0x16000000)
 
-INSTS(pcaddi,	"pcaddi",	0,	0,	IF_LA,	0x18000000)
-INSTS(pcaddu12i,	"pcaddu12i",	0,	0,	IF_LA,	0x1c000000)
-INSTS(pcalau12i,	"pcalau12i",	0,	0,	IF_LA,	0x1a000000)
-INSTS(pcaddu18i,	"pcaddu18i",	0,	0,	IF_LA,	0x1e000000)
+INST(pcaddi,	"pcaddi",	0,	0,	IF_LA,	0x18000000)
+INST(pcaddu12i,	"pcaddu12i",	0,	0,	IF_LA,	0x1c000000)
+INST(pcalau12i,	"pcalau12i",	0,	0,	IF_LA,	0x1a000000)
+INST(pcaddu18i,	"pcaddu18i",	0,	0,	IF_LA,	0x1e000000)
 
 ////R_R.
-INSTS(ext_w_b,	"ext.w.b",	0,	0,	IF_LA,	0x00005c00)
-INSTS(ext_w_h,	"ext.w.h",	0,	0,	IF_LA,	0x00005800)
-INSTS(clo_w,	"clo.w",	0,	0,	IF_LA,	0x00001000)
-INSTS(clz_w,	"clz.w",	0,	0,	IF_LA,	0x00001400)
-INSTS(cto_w,	"cto.w",	0,	0,	IF_LA,	0x00001800)
-INSTS(ctz_w,	"ctz.w",	0,	0,	IF_LA,	0x00001c00)
-INSTS(clo_d,	"clo.d",	0,	0,	IF_LA,	0x00002000)
-INSTS(clz_d,	"clz.d",	0,	0,	IF_LA,	0x00002400)
-INSTS(cto_d,	"cto.d",	0,	0,	IF_LA,	0x00002800)
-INSTS(ctz_d,	"ctz.d",	0,	0,	IF_LA,	0x00002c00)
-INSTS(revb_2h,	"revb.2h",	0,	0,	IF_LA,	0x00003000)
-INSTS(revb_4h,	"revb.4h",	0,	0,	IF_LA,	0x00003400)
-INSTS(revb_2w,	"revb.2w",	0,	0,	IF_LA,	0x00003800)
-INSTS(revb_d,	"revb.d",	0,	0,	IF_LA,	0x00003c00)
-INSTS(revh_2w,	"revh.2w",	0,	0,	IF_LA,	0x00004000)
-INSTS(revh_d,	"revh.d",	0,	0,	IF_LA,	0x00004400)
-INSTS(bitrev_4b,	"bitrev.4b",	0,	0,	IF_LA,	0x00004800)
-INSTS(bitrev_8b,	"bitrev.8b",	0,	0,	IF_LA,	0x00004c00)
-INSTS(bitrev_w,	"bitrev.w",	0,	0,	IF_LA,	0x00005000)
-INSTS(bitrev_d,	"bitrev.d",	0,	0,	IF_LA,	0x00005400)
-INSTS(rdtimel_w,	"rdtimel.w",	0,	0,	IF_LA,	0x00006000)
-INSTS(rdtimeh_w,	"rdtimeh.w",	0,	0,	IF_LA,	0x00006400)
-INSTS(rdtime_d,	"rdtime.d",	0,	0,	IF_LA,	0x00006800)
-INSTS(cpucfg,	"cpucfg",	0,	0,	IF_LA,	0x00006c00)
+INST(ext_w_b,	"ext.w.b",	0,	0,	IF_LA,	0x00005c00)
+INST(ext_w_h,	"ext.w.h",	0,	0,	IF_LA,	0x00005800)
+INST(clo_w,	"clo.w",	0,	0,	IF_LA,	0x00001000)
+INST(clz_w,	"clz.w",	0,	0,	IF_LA,	0x00001400)
+INST(cto_w,	"cto.w",	0,	0,	IF_LA,	0x00001800)
+INST(ctz_w,	"ctz.w",	0,	0,	IF_LA,	0x00001c00)
+INST(clo_d,	"clo.d",	0,	0,	IF_LA,	0x00002000)
+INST(clz_d,	"clz.d",	0,	0,	IF_LA,	0x00002400)
+INST(cto_d,	"cto.d",	0,	0,	IF_LA,	0x00002800)
+INST(ctz_d,	"ctz.d",	0,	0,	IF_LA,	0x00002c00)
+INST(revb_2h,	"revb.2h",	0,	0,	IF_LA,	0x00003000)
+INST(revb_4h,	"revb.4h",	0,	0,	IF_LA,	0x00003400)
+INST(revb_2w,	"revb.2w",	0,	0,	IF_LA,	0x00003800)
+INST(revb_d,	"revb.d",	0,	0,	IF_LA,	0x00003c00)
+INST(revh_2w,	"revh.2w",	0,	0,	IF_LA,	0x00004000)
+INST(revh_d,	"revh.d",	0,	0,	IF_LA,	0x00004400)
+INST(bitrev_4b,	"bitrev.4b",	0,	0,	IF_LA,	0x00004800)
+INST(bitrev_8b,	"bitrev.8b",	0,	0,	IF_LA,	0x00004c00)
+INST(bitrev_w,	"bitrev.w",	0,	0,	IF_LA,	0x00005000)
+INST(bitrev_d,	"bitrev.d",	0,	0,	IF_LA,	0x00005400)
+INST(rdtimel_w,	"rdtimel.w",	0,	0,	IF_LA,	0x00006000)
+INST(rdtimeh_w,	"rdtimeh.w",	0,	0,	IF_LA,	0x00006400)
+INST(rdtime_d,	"rdtime.d",	0,	0,	IF_LA,	0x00006800)
+INST(cpucfg,	"cpucfg",	0,	0,	IF_LA,	0x00006c00)
 
 ////R_R_I_I.
-INSTS(bstrins_w,	"bstrins.w",	0,	0,	IF_LA,	0x00600000)
-INSTS(bstrins_d,	"bstrins.d",	0,	0,	IF_LA,	0x00800000)
-INSTS(bstrpick_w,	"bstrpick.w",	0,	0,	IF_LA,	0x00608000)
-INSTS(bstrpick_d,	"bstrpick.d",	0,	0,	IF_LA,	0x00c00000)
+INST(bstrins_w,	"bstrins.w",	0,	0,	IF_LA,	0x00600000)
+INST(bstrins_d,	"bstrins.d",	0,	0,	IF_LA,	0x00800000)
+INST(bstrpick_w,	"bstrpick.w",	0,	0,	IF_LA,	0x00608000)
+INST(bstrpick_d,	"bstrpick.d",	0,	0,	IF_LA,	0x00c00000)
 
 ////Load.
-INSTS(ld_b,	"ld.b",	0,	LD,	IF_LA,	0x28000000)
-INSTS(ld_h,	"ld.h",	0,	LD,	IF_LA,	0x28400000)
-INSTS(ld_w,	"ld.w",	0,	LD,	IF_LA,	0x28800000)
-INSTS(ld_d,	"ld.d",	0,	LD,	IF_LA,	0x28c00000)
-INSTS(ld_bu,	"ld.bu",	0,	LD,	IF_LA,	0x2a000000)
-INSTS(ld_hu,	"ld.hu",	0,	LD,	IF_LA,	0x2a400000)
-INSTS(ld_wu,	"ld.wu",	0,	LD,	IF_LA,	0x2a800000)
-
-INSTS(ldptr_w,	"ldptr.w",	0,	LD,	IF_LA,	0x24000000)
-INSTS(ldptr_d,	"ldptr.d",	0,	LD,	IF_LA,	0x26000000)
-INSTS(ll_w,	"ll.w",	0,	0,	IF_LA,	0x20000000)
-INSTS(ll_d,	"ll.d",	0,	0,	IF_LA,	0x22000000)
-
-INSTS(ldx_b,	"ldx.b",	0,	LD,	IF_LA,	0x38000000)
-INSTS(ldx_h,	"ldx.h",	0,	LD,	IF_LA,	0x38040000)
-INSTS(ldx_w,	"ldx.w",	0,	LD,	IF_LA,	0x38080000)
-INSTS(ldx_d,	"ldx.d",	0,	LD,	IF_LA,	0x380c0000)
-INSTS(ldx_bu,	"ldx.bu",	0,	LD,	IF_LA,	0x38200000)
-INSTS(ldx_hu,	"ldx.hu",	0,	LD,	IF_LA,	0x38240000)
-INSTS(ldx_wu,	"ldx.wu",	0,	LD,	IF_LA,	0x38280000)
-
-INSTS(ldgt_b,	"ldgt.b",	0,	0,	IF_LA,	0x38780000)
-INSTS(ldgt_h,	"ldgt.h",	0,	0,	IF_LA,	0x38788000)
-INSTS(ldgt_w,	"ldgt.w",	0,	0,	IF_LA,	0x38790000)
-INSTS(ldgt_d,	"ldgt.d",	0,	0,	IF_LA,	0x38798000)
-INSTS(ldle_b,	"ldle.b",	0,	0,	IF_LA,	0x387a0000)
-INSTS(ldle_h,	"ldle.h",	0,	0,	IF_LA,	0x387a8000)
-INSTS(ldle_w,	"ldle.w",	0,	0,	IF_LA,	0x387b0000)
-INSTS(ldle_d,	"ldle.d",	0,	0,	IF_LA,	0x387b8000)
+INST(ld_b,	"ld.b",	0,	LD,	IF_LA,	0x28000000)
+INST(ld_h,	"ld.h",	0,	LD,	IF_LA,	0x28400000)
+INST(ld_w,	"ld.w",	0,	LD,	IF_LA,	0x28800000)
+INST(ld_d,	"ld.d",	0,	LD,	IF_LA,	0x28c00000)
+INST(ld_bu,	"ld.bu",	0,	LD,	IF_LA,	0x2a000000)
+INST(ld_hu,	"ld.hu",	0,	LD,	IF_LA,	0x2a400000)
+INST(ld_wu,	"ld.wu",	0,	LD,	IF_LA,	0x2a800000)
+
+INST(ldptr_w,	"ldptr.w",	0,	LD,	IF_LA,	0x24000000)
+INST(ldptr_d,	"ldptr.d",	0,	LD,	IF_LA,	0x26000000)
+INST(ll_w,	"ll.w",	0,	0,	IF_LA,	0x20000000)
+INST(ll_d,	"ll.d",	0,	0,	IF_LA,	0x22000000)
+
+INST(ldx_b,	"ldx.b",	0,	LD,	IF_LA,	0x38000000)
+INST(ldx_h,	"ldx.h",	0,	LD,	IF_LA,	0x38040000)
+INST(ldx_w,	"ldx.w",	0,	LD,	IF_LA,	0x38080000)
+INST(ldx_d,	"ldx.d",	0,	LD,	IF_LA,	0x380c0000)
+INST(ldx_bu,	"ldx.bu",	0,	LD,	IF_LA,	0x38200000)
+INST(ldx_hu,	"ldx.hu",	0,	LD,	IF_LA,	0x38240000)
+INST(ldx_wu,	"ldx.wu",	0,	LD,	IF_LA,	0x38280000)
+
+INST(ldgt_b,	"ldgt.b",	0,	0,	IF_LA,	0x38780000)
+INST(ldgt_h,	"ldgt.h",	0,	0,	IF_LA,	0x38788000)
+INST(ldgt_w,	"ldgt.w",	0,	0,	IF_LA,	0x38790000)
+INST(ldgt_d,	"ldgt.d",	0,	0,	IF_LA,	0x38798000)
+INST(ldle_b,	"ldle.b",	0,	0,	IF_LA,	0x387a0000)
+INST(ldle_h,	"ldle.h",	0,	0,	IF_LA,	0x387a8000)
+INST(ldle_w,	"ldle.w",	0,	0,	IF_LA,	0x387b0000)
+INST(ldle_d,	"ldle.d",	0,	0,	IF_LA,	0x387b8000)
 
 ////R_R_I.
-INSTS(addi_w,	"addi.w",	0,	0,	IF_LA,	0x02800000)
-INSTS(addi_d,	"addi.d",	0,	0,	IF_LA,	0x02c00000)
-INSTS(lu52i_d,	"lu52i.d",	0,	0,	IF_LA,	0x03000000)
-INSTS(slti,	"slti",	0,	0,	IF_LA,	0x02000000)
+INST(addi_w,	"addi.w",	0,	0,	IF_LA,	0x02800000)
+INST(addi_d,	"addi.d",	0,	0,	IF_LA,	0x02c00000)
+INST(lu52i_d,	"lu52i.d",	0,	0,	IF_LA,	0x03000000)
+INST(slti,	"slti",	0,	0,	IF_LA,	0x02000000)
 
-INSTS(sltui,	"sltui",	0,	0,	IF_LA,	0x02400000)
-INSTS(andi,	"andi",	0,	0,	IF_LA,	0x03400000)
-INSTS(ori,	"ori",	0,	0,	IF_LA,	0x03800000)
-INSTS(xori,	"xori",	0,	0,	IF_LA,	0x03c00000)
+INST(sltui,	"sltui",	0,	0,	IF_LA,	0x02400000)
+INST(andi,	"andi",	0,	0,	IF_LA,	0x03400000)
+INST(ori,	"ori",	0,	0,	IF_LA,	0x03800000)
+INST(xori,	"xori",	0,	0,	IF_LA,	0x03c00000)
 
-INSTS(slli_w,	"slli.w",	0,	0,	IF_LA,	0x00408000)
-INSTS(srli_w,	"srli.w",	0,	0,	IF_LA,	0x00448000)
-INSTS(srai_w,	"srai.w",	0,	0,	IF_LA,	0x00488000)
-INSTS(rotri_w,	"rotri.w",	0,	0,	IF_LA,	0x004c8000)
-INSTS(slli_d,	"slli.d",	0,	0,	IF_LA,	0x00410000)
-INSTS(srli_d,	"srli.d",	0,	0,	IF_LA,	0x00450000)
-INSTS(srai_d,	"srai.d",	0,	0,	IF_LA,	0x00490000)
-INSTS(rotri_d,	"rotri.d",	0,	0,	IF_LA,	0x004d0000)
+INST(slli_w,	"slli.w",	0,	0,	IF_LA,	0x00408000)
+INST(srli_w,	"srli.w",	0,	0,	IF_LA,	0x00448000)
+INST(srai_w,	"srai.w",	0,	0,	IF_LA,	0x00488000)
+INST(rotri_w,	"rotri.w",	0,	0,	IF_LA,	0x004c8000)
+INST(slli_d,	"slli.d",	0,	0,	IF_LA,	0x00410000)
+INST(srli_d,	"srli.d",	0,	0,	IF_LA,	0x00450000)
+INST(srai_d,	"srai.d",	0,	0,	IF_LA,	0x00490000)
+INST(rotri_d,	"rotri.d",	0,	0,	IF_LA,	0x004d0000)
 
-INSTS(addu16i_d,	"addu16i.d",	0,	0,	IF_LA,	0x10000000)
+INST(addu16i_d,	"addu16i.d",	0,	0,	IF_LA,	0x10000000)
 
-INSTS(jirl,	"jirl",	0,	0,	IF_LA,	0x4c000000)
+INST(jirl,	"jirl",	0,	0,	IF_LA,	0x4c000000)
 
 ////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg().
 ////////////////////////////////////////////////
@@ -285,211 +285,211 @@ INSTS(jirl,	"jirl",	0,	0,	IF_LA,	0x4c000000)
 ////     the above instructions will be used by emitter::emitInsMayWriteToGCReg().
 ////////////////////////////////////////////////
 ////Store.
-INSTS(st_b,	"st.b",	0,	ST,	IF_LA,	0x29000000)
-INSTS(st_h,	"st.h",	0,	ST,	IF_LA,	0x29400000)
-INSTS(st_w,	"st.w",	0,	ST,	IF_LA,	0x29800000)
-INSTS(st_d,	"st.d",	0,	ST,	IF_LA,	0x29c00000)
-
-INSTS(stptr_w,	"stptr.w",	0,	ST,	IF_LA,	0x25000000)
-INSTS(stptr_d,	"stptr.d",	0,	ST,	IF_LA,	0x27000000)
-INSTS(sc_w,	"sc.w",	0,	0,	IF_LA,	0x21000000)
-INSTS(sc_d,	"sc.d",	0,	0,	IF_LA,	0x23000000)
-
-INSTS(stx_b,	"stx.b",	0,	ST,	IF_LA,	0x38100000)
-INSTS(stx_h,	"stx.h",	0,	ST,	IF_LA,	0x38140000)
-INSTS(stx_w,	"stx.w",	0,	ST,	IF_LA,	0x38180000)
-INSTS(stx_d,	"stx.d",	0,	ST,	IF_LA,	0x381c0000)
-INSTS(stgt_b,	"stgt.b",	0,	0,	IF_LA,	0x387c0000)
-INSTS(stgt_h,	"stgt.h",	0,	0,	IF_LA,	0x387c8000)
-INSTS(stgt_w,	"stgt.w",	0,	0,	IF_LA,	0x387d0000)
-INSTS(stgt_d,	"stgt.d",	0,	0,	IF_LA,	0x387d8000)
-INSTS(stle_b,	"stle.b",	0,	0,	IF_LA,	0x387e0000)
-INSTS(stle_h,	"stle.h",	0,	0,	IF_LA,	0x387e8000)
-INSTS(stle_w,	"stle.w",	0,	0,	IF_LA,	0x387f0000)
-INSTS(stle_d,	"stle.d",	0,	0,	IF_LA,	0x387f8000)
-
-INSTS(dbar,    "dbar", 0,      0,      IF_LA,  0x38720000)
-INSTS(ibar,    "ibar", 0,      0,      IF_LA,  0x38728000)
-
-INSTS(syscall, "syscall",      0,      0,      IF_LA,  0x002b0000)
-INSTS(break,   "break",        0,      0,      IF_LA,  0x002a0005)
-
-INSTS(asrtle_d,        "asrtle.d",     0,      0,      IF_LA,  0x00010000)
-INSTS(asrtgt_d,        "asrtgt.d",     0,      0,      IF_LA,  0x00018000)
-
-INSTS(preld,   "preld",        0,      LD,     IF_LA,  0x2ac00000)
-INSTS(preldx,  "preldx",       0,      LD,     IF_LA,  0x382c0000)
+INST(st_b,	"st.b",	0,	ST,	IF_LA,	0x29000000)
+INST(st_h,	"st.h",	0,	ST,	IF_LA,	0x29400000)
+INST(st_w,	"st.w",	0,	ST,	IF_LA,	0x29800000)
+INST(st_d,	"st.d",	0,	ST,	IF_LA,	0x29c00000)
+
+INST(stptr_w,	"stptr.w",	0,	ST,	IF_LA,	0x25000000)
+INST(stptr_d,	"stptr.d",	0,	ST,	IF_LA,	0x27000000)
+INST(sc_w,	"sc.w",	0,	0,	IF_LA,	0x21000000)
+INST(sc_d,	"sc.d",	0,	0,	IF_LA,	0x23000000)
+
+INST(stx_b,	"stx.b",	0,	ST,	IF_LA,	0x38100000)
+INST(stx_h,	"stx.h",	0,	ST,	IF_LA,	0x38140000)
+INST(stx_w,	"stx.w",	0,	ST,	IF_LA,	0x38180000)
+INST(stx_d,	"stx.d",	0,	ST,	IF_LA,	0x381c0000)
+INST(stgt_b,	"stgt.b",	0,	0,	IF_LA,	0x387c0000)
+INST(stgt_h,	"stgt.h",	0,	0,	IF_LA,	0x387c8000)
+INST(stgt_w,	"stgt.w",	0,	0,	IF_LA,	0x387d0000)
+INST(stgt_d,	"stgt.d",	0,	0,	IF_LA,	0x387d8000)
+INST(stle_b,	"stle.b",	0,	0,	IF_LA,	0x387e0000)
+INST(stle_h,	"stle.h",	0,	0,	IF_LA,	0x387e8000)
+INST(stle_w,	"stle.w",	0,	0,	IF_LA,	0x387f0000)
+INST(stle_d,	"stle.d",	0,	0,	IF_LA,	0x387f8000)
+
+INST(dbar,    "dbar", 0,      0,      IF_LA,  0x38720000)
+INST(ibar,    "ibar", 0,      0,      IF_LA,  0x38728000)
+
+INST(syscall, "syscall",      0,      0,      IF_LA,  0x002b0000)
+INST(break,   "break",        0,      0,      IF_LA,  0x002a0005)
+
+INST(asrtle_d,        "asrtle.d",     0,      0,      IF_LA,  0x00010000)
+INST(asrtgt_d,        "asrtgt.d",     0,      0,      IF_LA,  0x00018000)
+
+INST(preld,   "preld",        0,      LD,     IF_LA,  0x2ac00000)
+INST(preldx,  "preldx",       0,      LD,     IF_LA,  0x382c0000)
 
 ////Float instructions.
 ////R_R_R.
-INSTS(fadd_s,	"fadd.s",	0,	0,	IF_LA,	0x01008000)
-INSTS(fadd_d,	"fadd.d",	0,	0,	IF_LA,	0x01010000)
-INSTS(fsub_s,	"fsub.s",	0,	0,	IF_LA,	0x01028000)
-INSTS(fsub_d,	"fsub.d",	0,	0,	IF_LA,	0x01030000)
-INSTS(fmul_s,	"fmul.s",	0,	0,	IF_LA,	0x01048000)
-INSTS(fmul_d,	"fmul.d",	0,	0,	IF_LA,	0x01050000)
-INSTS(fdiv_s,	"fdiv.s",	0,	0,	IF_LA,	0x01068000)
-INSTS(fdiv_d,	"fdiv.d",	0,	0,	IF_LA,	0x01070000)
-
-INSTS(fmax_s,	"fmax.s",	0,	0,	IF_LA,	0x01088000)
-INSTS(fmax_d,	"fmax.d",	0,	0,	IF_LA,	0x01090000)
-INSTS(fmin_s,	"fmin.s",	0,	0,	IF_LA,	0x010a8000)
-INSTS(fmin_d,	"fmin.d",	0,	0,	IF_LA,	0x010b0000)
-INSTS(fmaxa_s,	"fmaxa.s",	0,	0,	IF_LA,	0x010c8000)
-INSTS(fmaxa_d,	"fmaxa.d",	0,	0,	IF_LA,	0x010d0000)
-INSTS(fmina_s,	"fmina.s",	0,	0,	IF_LA,	0x010e8000)
-INSTS(fmina_d,	"fmina.d",	0,	0,	IF_LA,	0x010f0000)
-
-INSTS(fscaleb_s,	"fscaleb.s",	0,	0,	IF_LA,	0x01108000)
-INSTS(fscaleb_d,	"fscaleb.d",	0,	0,	IF_LA,	0x01110000)
-
-INSTS(fcopysign_s,	"fcopysign.s",	0,	0,	IF_LA,	0x01128000)
-INSTS(fcopysign_d,	"fcopysign.d",	0,	0,	IF_LA,	0x01130000)
-
-INSTS(fldx_s,	"fldx.s",	0,	LD,	IF_LA,	0x38300000)
-INSTS(fldx_d,	"fldx.d",	0,	LD,	IF_LA,	0x38340000)
-INSTS(fstx_s,	"fstx.s",	0,	ST,	IF_LA,	0x38380000)
-INSTS(fstx_d,	"fstx.d",	0,	ST,	IF_LA,	0x383c0000)
-
-INSTS(fldgt_s,	"fldgt.s",	0,	0,	IF_LA,	0x38740000)
-INSTS(fldgt_d,	"fldgt.d",	0,	0,	IF_LA,	0x38748000)
-INSTS(fldle_s,	"fldle.s",	0,	0,	IF_LA,	0x38750000)
-INSTS(fldle_d,	"fldle.d",	0,	0,	IF_LA,	0x38758000)
-INSTS(fstgt_s,	"fstgt.s",	0,	0,	IF_LA,	0x38760000)
-INSTS(fstgt_d,	"fstgt.d",	0,	0,	IF_LA,	0x38768000)
-INSTS(fstle_s,	"fstle.s",	0,	0,	IF_LA,	0x38770000)
-INSTS(fstle_d,	"fstle.d",	0,	0,	IF_LA,	0x38778000)
+INST(fadd_s,	"fadd.s",	0,	0,	IF_LA,	0x01008000)
+INST(fadd_d,	"fadd.d",	0,	0,	IF_LA,	0x01010000)
+INST(fsub_s,	"fsub.s",	0,	0,	IF_LA,	0x01028000)
+INST(fsub_d,	"fsub.d",	0,	0,	IF_LA,	0x01030000)
+INST(fmul_s,	"fmul.s",	0,	0,	IF_LA,	0x01048000)
+INST(fmul_d,	"fmul.d",	0,	0,	IF_LA,	0x01050000)
+INST(fdiv_s,	"fdiv.s",	0,	0,	IF_LA,	0x01068000)
+INST(fdiv_d,	"fdiv.d",	0,	0,	IF_LA,	0x01070000)
+
+INST(fmax_s,	"fmax.s",	0,	0,	IF_LA,	0x01088000)
+INST(fmax_d,	"fmax.d",	0,	0,	IF_LA,	0x01090000)
+INST(fmin_s,	"fmin.s",	0,	0,	IF_LA,	0x010a8000)
+INST(fmin_d,	"fmin.d",	0,	0,	IF_LA,	0x010b0000)
+INST(fmaxa_s,	"fmaxa.s",	0,	0,	IF_LA,	0x010c8000)
+INST(fmaxa_d,	"fmaxa.d",	0,	0,	IF_LA,	0x010d0000)
+INST(fmina_s,	"fmina.s",	0,	0,	IF_LA,	0x010e8000)
+INST(fmina_d,	"fmina.d",	0,	0,	IF_LA,	0x010f0000)
+
+INST(fscaleb_s,	"fscaleb.s",	0,	0,	IF_LA,	0x01108000)
+INST(fscaleb_d,	"fscaleb.d",	0,	0,	IF_LA,	0x01110000)
+
+INST(fcopysign_s,	"fcopysign.s",	0,	0,	IF_LA,	0x01128000)
+INST(fcopysign_d,	"fcopysign.d",	0,	0,	IF_LA,	0x01130000)
+
+INST(fldx_s,	"fldx.s",	0,	LD,	IF_LA,	0x38300000)
+INST(fldx_d,	"fldx.d",	0,	LD,	IF_LA,	0x38340000)
+INST(fstx_s,	"fstx.s",	0,	ST,	IF_LA,	0x38380000)
+INST(fstx_d,	"fstx.d",	0,	ST,	IF_LA,	0x383c0000)
+
+INST(fldgt_s,	"fldgt.s",	0,	0,	IF_LA,	0x38740000)
+INST(fldgt_d,	"fldgt.d",	0,	0,	IF_LA,	0x38748000)
+INST(fldle_s,	"fldle.s",	0,	0,	IF_LA,	0x38750000)
+INST(fldle_d,	"fldle.d",	0,	0,	IF_LA,	0x38758000)
+INST(fstgt_s,	"fstgt.s",	0,	0,	IF_LA,	0x38760000)
+INST(fstgt_d,	"fstgt.d",	0,	0,	IF_LA,	0x38768000)
+INST(fstle_s,	"fstle.s",	0,	0,	IF_LA,	0x38770000)
+INST(fstle_d,	"fstle.d",	0,	0,	IF_LA,	0x38778000)
 
 ////R_R_R_R.
-INSTS(fmadd_s,	"fmadd.s",	0,	0,	IF_LA,	0x08100000)
-INSTS(fmadd_d,	"fmadd.d",	0,	0,	IF_LA,	0x08200000)
-INSTS(fmsub_s,	"fmsub.s",	0,	0,	IF_LA,	0x08500000)
-INSTS(fmsub_d,	"fmsub.d",	0,	0,	IF_LA,	0x08600000)
-INSTS(fnmadd_s,	"fnmadd.s",	0,	0,	IF_LA,	0x08900000)
-INSTS(fnmadd_d,	"fnmadd.d",	0,	0,	IF_LA,	0x08a00000)
-INSTS(fnmsub_s,	"fnmsub.s",	0,	0,	IF_LA,	0x08d00000)
-INSTS(fnmsub_d,	"fnmsub.d",	0,	0,	IF_LA,	0x08e00000)
+INST(fmadd_s,	"fmadd.s",	0,	0,	IF_LA,	0x08100000)
+INST(fmadd_d,	"fmadd.d",	0,	0,	IF_LA,	0x08200000)
+INST(fmsub_s,	"fmsub.s",	0,	0,	IF_LA,	0x08500000)
+INST(fmsub_d,	"fmsub.d",	0,	0,	IF_LA,	0x08600000)
+INST(fnmadd_s,	"fnmadd.s",	0,	0,	IF_LA,	0x08900000)
+INST(fnmadd_d,	"fnmadd.d",	0,	0,	IF_LA,	0x08a00000)
+INST(fnmsub_s,	"fnmsub.s",	0,	0,	IF_LA,	0x08d00000)
+INST(fnmsub_d,	"fnmsub.d",	0,	0,	IF_LA,	0x08e00000)
 
 ////R_R.
-INSTS(fabs_s,	"fabs.s",	0,	0,	IF_LA,	0x01140400)
-INSTS(fabs_d,	"fabs.d",	0,	0,	IF_LA,	0x01140800)
-INSTS(fneg_s,	"fneg.s",	0,	0,	IF_LA,	0x01141400)
-INSTS(fneg_d,	"fneg.d",	0,	0,	IF_LA,	0x01141800)
-
-INSTS(fsqrt_s,	"fsqrt.s",	0,	0,	IF_LA,	0x01144400)
-INSTS(fsqrt_d,	"fsqrt.d",	0,	0,	IF_LA,	0x01144800)
-INSTS(frsqrt_s,	"frsqrt.s",	0,	0,	IF_LA,	0x01146400)
-INSTS(frsqrt_d,	"frsqrt.d",	0,	0,	IF_LA,	0x01146800)
-INSTS(frecip_s,	"frecip.s",	0,	0,	IF_LA,	0x01145400)
-INSTS(frecip_d,	"frecip.d",	0,	0,	IF_LA,	0x01145800)
-INSTS(flogb_s,	"flogb.s",	0,	0,	IF_LA,	0x01142400)
-INSTS(flogb_d,	"flogb.d",	0,	0,	IF_LA,	0x01142800)
-INSTS(fclass_s,	"fclass.s",	0,	0,	IF_LA,	0x01143400)
-INSTS(fclass_d,	"fclass.d",	0,	0,	IF_LA,	0x01143800)
-
-INSTS(fcvt_s_d,	"fcvt.s.d",	0,	0,	IF_LA,	0x01191800)
-INSTS(fcvt_d_s,	"fcvt.d.s",	0,	0,	IF_LA,	0x01192400)
-INSTS(ffint_s_w,	"ffint.s.w",	0,	0,	IF_LA,	0x011d1000)
-INSTS(ffint_s_l,	"ffint.s.l",	0,	0,	IF_LA,	0x011d1800)
-INSTS(ffint_d_w,	"ffint.d.w",	0,	0,	IF_LA,	0x011d2000)
-INSTS(ffint_d_l,	"ffint.d.l",	0,	0,	IF_LA,	0x011d2800)
-INSTS(ftint_w_s,	"ftint.w.s",	0,	0,	IF_LA,	0x011b0400)
-INSTS(ftint_w_d,	"ftint.w.d",	0,	0,	IF_LA,	0x011b0800)
-INSTS(ftint_l_s,	"ftint.l.s",	0,	0,	IF_LA,	0x011b2400)
-INSTS(ftint_l_d,	"ftint.l.d",	0,	0,	IF_LA,	0x011b2800)
-INSTS(ftintrm_w_s,	"ftintrm.w.s",	0,	0,	IF_LA,	0x011a0400)
-INSTS(ftintrm_w_d,	"ftintrm.w.d",	0,	0,	IF_LA,	0x011a0800)
-INSTS(ftintrm_l_s,	"ftintrm.l.s",	0,	0,	IF_LA,	0x011a2400)
-INSTS(ftintrm_l_d,	"ftintrm.l.d",	0,	0,	IF_LA,	0x011a2800)
-INSTS(ftintrp_w_s,	"ftintrp.w.s",	0,	0,	IF_LA,	0x011a4400)
-INSTS(ftintrp_w_d,	"ftintrp.w.d",	0,	0,	IF_LA,	0x011a4800)
-INSTS(ftintrp_l_s,	"ftintrp.l.s",	0,	0,	IF_LA,	0x011a6400)
-INSTS(ftintrp_l_d,	"ftintrp.l.d",	0,	0,	IF_LA,	0x011a6800)
-INSTS(ftintrz_w_s,	"ftintrz.w.s",	0,	0,	IF_LA,	0x011a8400)
-INSTS(ftintrz_w_d,	"ftintrz.w.d",	0,	0,	IF_LA,	0x011a8800)
-INSTS(ftintrz_l_s,	"ftintrz.l.s",	0,	0,	IF_LA,	0x011aa400)
-INSTS(ftintrz_l_d,	"ftintrz.l.d",	0,	0,	IF_LA,	0x011aa800)
-INSTS(ftintrne_w_s,	"ftintrne.w.s",	0,	0,	IF_LA,	0x011ac400)
-INSTS(ftintrne_w_d,	"ftintrne.w.d",	0,	0,	IF_LA,	0x011ac800)
-INSTS(ftintrne_l_s,	"ftintrne.l.s",	0,	0,	IF_LA,	0x011ae400)
-INSTS(ftintrne_l_d,	"ftintrne.l.d",	0,	0,	IF_LA,	0x011ae800)
-INSTS(frint_s,	"frint.s",	0,	0,	IF_LA,	0x011e4400)
-INSTS(frint_d,	"frint.d",	0,	0,	IF_LA,	0x011e4800)
-
-INSTS(fmov_s,	"fmov.s",	0,	0,	IF_LA,	0x01149400)
-INSTS(fmov_d,	"fmov.d",	0,	0,	IF_LA,	0x01149800)
-
-INSTS(movgr2fr_w,	"movgr2fr.w",	0,	0,	IF_LA,	0x0114a400)
-INSTS(movgr2fr_d,	"movgr2fr.d",	0,	0,	IF_LA,	0x0114a800)
-INSTS(movgr2frh_w,	"movgr2frh.w",	0,	0,	IF_LA,	0x0114ac00)
-INSTS(movfr2gr_s,	"movfr2gr.s",	0,	0,	IF_LA,	0x0114b400)
-INSTS(movfr2gr_d,	"movfr2gr.d",	0,	0,	IF_LA,	0x0114b800)
-INSTS(movfrh2gr_s,	"movfrh2gr.s",	0,	0,	IF_LA,	0x0114bc00)
+INST(fabs_s,	"fabs.s",	0,	0,	IF_LA,	0x01140400)
+INST(fabs_d,	"fabs.d",	0,	0,	IF_LA,	0x01140800)
+INST(fneg_s,	"fneg.s",	0,	0,	IF_LA,	0x01141400)
+INST(fneg_d,	"fneg.d",	0,	0,	IF_LA,	0x01141800)
+
+INST(fsqrt_s,	"fsqrt.s",	0,	0,	IF_LA,	0x01144400)
+INST(fsqrt_d,	"fsqrt.d",	0,	0,	IF_LA,	0x01144800)
+INST(frsqrt_s,	"frsqrt.s",	0,	0,	IF_LA,	0x01146400)
+INST(frsqrt_d,	"frsqrt.d",	0,	0,	IF_LA,	0x01146800)
+INST(frecip_s,	"frecip.s",	0,	0,	IF_LA,	0x01145400)
+INST(frecip_d,	"frecip.d",	0,	0,	IF_LA,	0x01145800)
+INST(flogb_s,	"flogb.s",	0,	0,	IF_LA,	0x01142400)
+INST(flogb_d,	"flogb.d",	0,	0,	IF_LA,	0x01142800)
+INST(fclass_s,	"fclass.s",	0,	0,	IF_LA,	0x01143400)
+INST(fclass_d,	"fclass.d",	0,	0,	IF_LA,	0x01143800)
+
+INST(fcvt_s_d,	"fcvt.s.d",	0,	0,	IF_LA,	0x01191800)
+INST(fcvt_d_s,	"fcvt.d.s",	0,	0,	IF_LA,	0x01192400)
+INST(ffint_s_w,	"ffint.s.w",	0,	0,	IF_LA,	0x011d1000)
+INST(ffint_s_l,	"ffint.s.l",	0,	0,	IF_LA,	0x011d1800)
+INST(ffint_d_w,	"ffint.d.w",	0,	0,	IF_LA,	0x011d2000)
+INST(ffint_d_l,	"ffint.d.l",	0,	0,	IF_LA,	0x011d2800)
+INST(ftint_w_s,	"ftint.w.s",	0,	0,	IF_LA,	0x011b0400)
+INST(ftint_w_d,	"ftint.w.d",	0,	0,	IF_LA,	0x011b0800)
+INST(ftint_l_s,	"ftint.l.s",	0,	0,	IF_LA,	0x011b2400)
+INST(ftint_l_d,	"ftint.l.d",	0,	0,	IF_LA,	0x011b2800)
+INST(ftintrm_w_s,	"ftintrm.w.s",	0,	0,	IF_LA,	0x011a0400)
+INST(ftintrm_w_d,	"ftintrm.w.d",	0,	0,	IF_LA,	0x011a0800)
+INST(ftintrm_l_s,	"ftintrm.l.s",	0,	0,	IF_LA,	0x011a2400)
+INST(ftintrm_l_d,	"ftintrm.l.d",	0,	0,	IF_LA,	0x011a2800)
+INST(ftintrp_w_s,	"ftintrp.w.s",	0,	0,	IF_LA,	0x011a4400)
+INST(ftintrp_w_d,	"ftintrp.w.d",	0,	0,	IF_LA,	0x011a4800)
+INST(ftintrp_l_s,	"ftintrp.l.s",	0,	0,	IF_LA,	0x011a6400)
+INST(ftintrp_l_d,	"ftintrp.l.d",	0,	0,	IF_LA,	0x011a6800)
+INST(ftintrz_w_s,	"ftintrz.w.s",	0,	0,	IF_LA,	0x011a8400)
+INST(ftintrz_w_d,	"ftintrz.w.d",	0,	0,	IF_LA,	0x011a8800)
+INST(ftintrz_l_s,	"ftintrz.l.s",	0,	0,	IF_LA,	0x011aa400)
+INST(ftintrz_l_d,	"ftintrz.l.d",	0,	0,	IF_LA,	0x011aa800)
+INST(ftintrne_w_s,	"ftintrne.w.s",	0,	0,	IF_LA,	0x011ac400)
+INST(ftintrne_w_d,	"ftintrne.w.d",	0,	0,	IF_LA,	0x011ac800)
+INST(ftintrne_l_s,	"ftintrne.l.s",	0,	0,	IF_LA,	0x011ae400)
+INST(ftintrne_l_d,	"ftintrne.l.d",	0,	0,	IF_LA,	0x011ae800)
+INST(frint_s,	"frint.s",	0,	0,	IF_LA,	0x011e4400)
+INST(frint_d,	"frint.d",	0,	0,	IF_LA,	0x011e4800)
+
+INST(fmov_s,	"fmov.s",	0,	0,	IF_LA,	0x01149400)
+INST(fmov_d,	"fmov.d",	0,	0,	IF_LA,	0x01149800)
+
+INST(movgr2fr_w,	"movgr2fr.w",	0,	0,	IF_LA,	0x0114a400)
+INST(movgr2fr_d,	"movgr2fr.d",	0,	0,	IF_LA,	0x0114a800)
+INST(movgr2frh_w,	"movgr2frh.w",	0,	0,	IF_LA,	0x0114ac00)
+INST(movfr2gr_s,	"movfr2gr.s",	0,	0,	IF_LA,	0x0114b400)
+INST(movfr2gr_d,	"movfr2gr.d",	0,	0,	IF_LA,	0x0114b800)
+INST(movfrh2gr_s,	"movfrh2gr.s",	0,	0,	IF_LA,	0x0114bc00)
 
 ////
-INSTS(movgr2fcsr,	"movgr2fcsr",	0,	0,	IF_LA,	0x0114c000)
-INSTS(movfcsr2gr,	"movfcsr2gr",	0,	0,	IF_LA,	0x0114c800)
-INSTS(movfr2cf,	"movfr2cf",	0,	0,	IF_LA,	0x0114d000)
-INSTS(movcf2fr,	"movcf2fr",	0,	0,	IF_LA,	0x0114d400)
-INSTS(movgr2cf,	"movgr2cf",	0,	0,	IF_LA,	0x0114d800)
-INSTS(movcf2gr,	"movcf2gr",	0,	0,	IF_LA,	0x0114dc00)
+INST(movgr2fcsr,	"movgr2fcsr",	0,	0,	IF_LA,	0x0114c000)
+INST(movfcsr2gr,	"movfcsr2gr",	0,	0,	IF_LA,	0x0114c800)
+INST(movfr2cf,	"movfr2cf",	0,	0,	IF_LA,	0x0114d000)
+INST(movcf2fr,	"movcf2fr",	0,	0,	IF_LA,	0x0114d400)
+INST(movgr2cf,	"movgr2cf",	0,	0,	IF_LA,	0x0114d800)
+INST(movcf2gr,	"movcf2gr",	0,	0,	IF_LA,	0x0114dc00)
 
 ////R_R_I.
-INSTS(fcmp_caf_s,	"fcmp.caf.s",	0,	0,	IF_LA,	0x0c100000)
-INSTS(fcmp_cun_s,	"fcmp.cun.s",	0,	0,	IF_LA,	0x0c140000)
-INSTS(fcmp_ceq_s,	"fcmp.ceq.s",	0,	0,	IF_LA,	0x0c120000)
-INSTS(fcmp_cueq_s,	"fcmp.cueq.s",	0,	0,	IF_LA,	0x0c160000)
-INSTS(fcmp_clt_s,	"fcmp.clt.s",	0,	0,	IF_LA,	0x0c110000)
-INSTS(fcmp_cult_s,	"fcmp.cult.s",	0,	0,	IF_LA,	0x0c150000)
-INSTS(fcmp_cle_s,	"fcmp.cle.s",	0,	0,	IF_LA,	0x0c130000)
-INSTS(fcmp_cule_s,	"fcmp.cule.s",	0,	0,	IF_LA,	0x0c170000)
-INSTS(fcmp_cne_s,	"fcmp.cne.s",	0,	0,	IF_LA,	0x0c180000)
-INSTS(fcmp_cor_s,	"fcmp.cor.s",	0,	0,	IF_LA,	0x0c1a0000)
-INSTS(fcmp_cune_s,	"fcmp.cune.s",	0,	0,	IF_LA,	0x0c1c0000)
-
-INSTS(fcmp_saf_d,	"fcmp.saf.d",	0,	0,	IF_LA,	0x0c208000)
-INSTS(fcmp_sun_d,	"fcmp.sun.d",	0,	0,	IF_LA,	0x0c248000)
-INSTS(fcmp_seq_d,	"fcmp.seq.d",	0,	0,	IF_LA,	0x0c228000)
-INSTS(fcmp_sueq_d,	"fcmp.sueq.d",	0,	0,	IF_LA,	0x0c268000)
-INSTS(fcmp_slt_d,	"fcmp.slt.d",	0,	0,	IF_LA,	0x0c218000)
-INSTS(fcmp_sult_d,	"fcmp.sult.d",	0,	0,	IF_LA,	0x0c258000)
-INSTS(fcmp_sle_d,	"fcmp.sle.d",	0,	0,	IF_LA,	0x0c238000)
-INSTS(fcmp_sule_d,	"fcmp.sule.d",	0,	0,	IF_LA,	0x0c278000)
-INSTS(fcmp_sne_d,	"fcmp.sne.d",	0,	0,	IF_LA,	0x0c288000)
-INSTS(fcmp_sor_d,	"fcmp.sor.d",	0,	0,	IF_LA,	0x0c2a8000)
-INSTS(fcmp_sune_d,	"fcmp.sune.d",	0,	0,	IF_LA,	0x0c2c8000)
-
-INSTS(fcmp_caf_d,	"fcmp.caf.d",	0,	0,	IF_LA,	0x0c200000)
-INSTS(fcmp_cun_d,	"fcmp.cun.d",	0,	0,	IF_LA,	0x0c240000)
-INSTS(fcmp_ceq_d,	"fcmp.ceq.d",	0,	0,	IF_LA,	0x0c220000)
-INSTS(fcmp_cueq_d,	"fcmp.cueq.d",	0,	0,	IF_LA,	0x0c260000)
-INSTS(fcmp_clt_d,	"fcmp.clt.d",	0,	0,	IF_LA,	0x0c210000)
-INSTS(fcmp_cult_d,	"fcmp.cult.d",	0,	0,	IF_LA,	0x0c250000)
-INSTS(fcmp_cle_d,	"fcmp.cle.d",	0,	0,	IF_LA,	0x0c230000)
-INSTS(fcmp_cule_d,	"fcmp.cule.d",	0,	0,	IF_LA,	0x0c270000)
-INSTS(fcmp_cne_d,	"fcmp.cne.d",	0,	0,	IF_LA,	0x0c280000)
-INSTS(fcmp_cor_d,	"fcmp.cor.d",	0,	0,	IF_LA,	0x0c2a0000)
-INSTS(fcmp_cune_d,	"fcmp.cune.d",	0,	0,	IF_LA,	0x0c2c0000)
-
-INSTS(fcmp_saf_s,	"fcmp.saf.s",	0,	0,	IF_LA,	0x0c108000)
-INSTS(fcmp_sun_s,	"fcmp.sun.s",	0,	0,	IF_LA,	0x0c148000)
-INSTS(fcmp_seq_s,	"fcmp.seq.s",	0,	0,	IF_LA,	0x0c128000)
-INSTS(fcmp_sueq_s,	"fcmp.sueq.s",	0,	0,	IF_LA,	0x0c168000)
-INSTS(fcmp_slt_s,	"fcmp.slt.s",	0,	0,	IF_LA,	0x0c118000)
-INSTS(fcmp_sult_s,	"fcmp.sult.s",	0,	0,	IF_LA,	0x0c158000)
-INSTS(fcmp_sle_s,	"fcmp.sle.s",	0,	0,	IF_LA,	0x0c138000)
-INSTS(fcmp_sule_s,	"fcmp.sule.s",	0,	0,	IF_LA,	0x0c178000)
-INSTS(fcmp_sne_s,	"fcmp.sne.s",	0,	0,	IF_LA,	0x0c188000)
-INSTS(fcmp_sor_s,	"fcmp.sor.s",	0,	0,	IF_LA,	0x0c1a8000)
-INSTS(fcmp_sune_s,	"fcmp.sune.s",	0,	0,	IF_LA,	0x0c1c8000)
+INST(fcmp_caf_s,	"fcmp.caf.s",	0,	0,	IF_LA,	0x0c100000)
+INST(fcmp_cun_s,	"fcmp.cun.s",	0,	0,	IF_LA,	0x0c140000)
+INST(fcmp_ceq_s,	"fcmp.ceq.s",	0,	0,	IF_LA,	0x0c120000)
+INST(fcmp_cueq_s,	"fcmp.cueq.s",	0,	0,	IF_LA,	0x0c160000)
+INST(fcmp_clt_s,	"fcmp.clt.s",	0,	0,	IF_LA,	0x0c110000)
+INST(fcmp_cult_s,	"fcmp.cult.s",	0,	0,	IF_LA,	0x0c150000)
+INST(fcmp_cle_s,	"fcmp.cle.s",	0,	0,	IF_LA,	0x0c130000)
+INST(fcmp_cule_s,	"fcmp.cule.s",	0,	0,	IF_LA,	0x0c170000)
+INST(fcmp_cne_s,	"fcmp.cne.s",	0,	0,	IF_LA,	0x0c180000)
+INST(fcmp_cor_s,	"fcmp.cor.s",	0,	0,	IF_LA,	0x0c1a0000)
+INST(fcmp_cune_s,	"fcmp.cune.s",	0,	0,	IF_LA,	0x0c1c0000)
+
+INST(fcmp_saf_d,	"fcmp.saf.d",	0,	0,	IF_LA,	0x0c208000)
+INST(fcmp_sun_d,	"fcmp.sun.d",	0,	0,	IF_LA,	0x0c248000)
+INST(fcmp_seq_d,	"fcmp.seq.d",	0,	0,	IF_LA,	0x0c228000)
+INST(fcmp_sueq_d,	"fcmp.sueq.d",	0,	0,	IF_LA,	0x0c268000)
+INST(fcmp_slt_d,	"fcmp.slt.d",	0,	0,	IF_LA,	0x0c218000)
+INST(fcmp_sult_d,	"fcmp.sult.d",	0,	0,	IF_LA,	0x0c258000)
+INST(fcmp_sle_d,	"fcmp.sle.d",	0,	0,	IF_LA,	0x0c238000)
+INST(fcmp_sule_d,	"fcmp.sule.d",	0,	0,	IF_LA,	0x0c278000)
+INST(fcmp_sne_d,	"fcmp.sne.d",	0,	0,	IF_LA,	0x0c288000)
+INST(fcmp_sor_d,	"fcmp.sor.d",	0,	0,	IF_LA,	0x0c2a8000)
+INST(fcmp_sune_d,	"fcmp.sune.d",	0,	0,	IF_LA,	0x0c2c8000)
+
+INST(fcmp_caf_d,	"fcmp.caf.d",	0,	0,	IF_LA,	0x0c200000)
+INST(fcmp_cun_d,	"fcmp.cun.d",	0,	0,	IF_LA,	0x0c240000)
+INST(fcmp_ceq_d,	"fcmp.ceq.d",	0,	0,	IF_LA,	0x0c220000)
+INST(fcmp_cueq_d,	"fcmp.cueq.d",	0,	0,	IF_LA,	0x0c260000)
+INST(fcmp_clt_d,	"fcmp.clt.d",	0,	0,	IF_LA,	0x0c210000)
+INST(fcmp_cult_d,	"fcmp.cult.d",	0,	0,	IF_LA,	0x0c250000)
+INST(fcmp_cle_d,	"fcmp.cle.d",	0,	0,	IF_LA,	0x0c230000)
+INST(fcmp_cule_d,	"fcmp.cule.d",	0,	0,	IF_LA,	0x0c270000)
+INST(fcmp_cne_d,	"fcmp.cne.d",	0,	0,	IF_LA,	0x0c280000)
+INST(fcmp_cor_d,	"fcmp.cor.d",	0,	0,	IF_LA,	0x0c2a0000)
+INST(fcmp_cune_d,	"fcmp.cune.d",	0,	0,	IF_LA,	0x0c2c0000)
+
+INST(fcmp_saf_s,	"fcmp.saf.s",	0,	0,	IF_LA,	0x0c108000)
+INST(fcmp_sun_s,	"fcmp.sun.s",	0,	0,	IF_LA,	0x0c148000)
+INST(fcmp_seq_s,	"fcmp.seq.s",	0,	0,	IF_LA,	0x0c128000)
+INST(fcmp_sueq_s,	"fcmp.sueq.s",	0,	0,	IF_LA,	0x0c168000)
+INST(fcmp_slt_s,	"fcmp.slt.s",	0,	0,	IF_LA,	0x0c118000)
+INST(fcmp_sult_s,	"fcmp.sult.s",	0,	0,	IF_LA,	0x0c158000)
+INST(fcmp_sle_s,	"fcmp.sle.s",	0,	0,	IF_LA,	0x0c138000)
+INST(fcmp_sule_s,	"fcmp.sule.s",	0,	0,	IF_LA,	0x0c178000)
+INST(fcmp_sne_s,	"fcmp.sne.s",	0,	0,	IF_LA,	0x0c188000)
+INST(fcmp_sor_s,	"fcmp.sor.s",	0,	0,	IF_LA,	0x0c1a8000)
+INST(fcmp_sune_s,	"fcmp.sune.s",	0,	0,	IF_LA,	0x0c1c8000)
 
 ////R_R_I.
-INSTS(fld_s,	"fld.s",	0,	LD,	IF_LA,	0x2b000000)
-INSTS(fld_d,	"fld.d",	0,	LD,	IF_LA,	0x2b800000)
-INSTS(fst_s,	"fst.s",	0,	ST,	IF_LA,	0x2b400000)
-INSTS(fst_d,	"fst.d",	0,	ST,	IF_LA,	0x2bc00000)
+INST(fld_s,	"fld.s",	0,	LD,	IF_LA,	0x2b000000)
+INST(fld_d,	"fld.d",	0,	LD,	IF_LA,	0x2b800000)
+INST(fst_s,	"fst.s",	0,	ST,	IF_LA,	0x2b400000)
+INST(fst_d,	"fst.d",	0,	ST,	IF_LA,	0x2bc00000)
 
 // clang-format on
 /*****************************************************************************/
-#undef INSTS
+#undef INST
 /*****************************************************************************/
diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h
index f6254ecdb79b3..279ac5292ec0d 100644
--- a/src/coreclr/jit/lsra.h
+++ b/src/coreclr/jit/lsra.h
@@ -1003,19 +1003,16 @@ class LinearScan : public LinearScanInterface
     void buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, LsraLocation currentLoc, GenTree* node);
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
-#if defined(UNIX_AMD64_ABI)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
     // For AMD64 on SystemV machines. This method
     // is called as replacement for raUpdateRegStateForArg
     // that is used on Windows. On System V systems a struct can be passed
     // partially using registers from the 2 register files.
-    void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
-#endif // defined(UNIX_AMD64_ABI)
-
-#if defined(TARGET_LOONGARCH64)
+    //
     // For LoongArch64's ABI, a struct can be passed
     // partially using registers from the 2 register files.
-    void LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc);
-#endif
+    void UpdateRegStateForStructArg(LclVarDsc* argDsc);
+#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
 
     // Update reg state for an incoming register argument
     void updateRegStateForArg(LclVarDsc* argDsc);
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index 90473a99ea003..4df1f14b7a689 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -1962,9 +1962,11 @@ void LinearScan::insertZeroInitRefPositions()
     }
 }
 
-#if defined(UNIX_AMD64_ABI)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
 //------------------------------------------------------------------------
-// unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems.
+// UpdateRegStateForStructArg:
+//    Sets the register state for an argument of type STRUCT.
+//    This is shared between with AMD64's SystemV systems and LoongArch64-ABI.
 //
 // Arguments:
 //    argDsc - the LclVarDsc for the argument of interest
@@ -1973,7 +1975,7 @@ void LinearScan::insertZeroInitRefPositions()
 //     See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
 //         for how state for argument is updated for unix non-structs and Windows AMD64 structs.
 //
-void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
+void LinearScan::UpdateRegStateForStructArg(LclVarDsc* argDsc)
 {
     assert(varTypeIsStruct(argDsc));
     RegState* intRegState   = &compiler->codeGen->intRegState;
@@ -2008,44 +2010,7 @@ void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
     }
 }
 
-#endif // defined(UNIX_AMD64_ABI)
-
-#ifdef TARGET_LOONGARCH64
-void LinearScan::LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc)
-{
-    assert(varTypeIsStruct(argDsc));
-    RegState* intRegState   = &compiler->codeGen->intRegState;
-    RegState* floatRegState = &compiler->codeGen->floatRegState;
-
-    if ((argDsc->GetArgReg() != REG_STK) && (argDsc->GetArgReg() != REG_NA))
-    {
-        if (genRegMask(argDsc->GetArgReg()) & (RBM_ALLFLOAT))
-        {
-            assert(genRegMask(argDsc->GetArgReg()) & (RBM_FLTARG_REGS));
-            floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg());
-        }
-        else
-        {
-            assert(genRegMask(argDsc->GetArgReg()) & (RBM_ARG_REGS));
-            intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetArgReg());
-        }
-    }
-
-    if ((argDsc->GetOtherArgReg() != REG_STK) && (argDsc->GetOtherArgReg() != REG_NA))
-    {
-        if (genRegMask(argDsc->GetOtherArgReg()) & (RBM_ALLFLOAT))
-        {
-            assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_FLTARG_REGS));
-            floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetOtherArgReg());
-        }
-        else
-        {
-            assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_ARG_REGS));
-            intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->GetOtherArgReg());
-        }
-    }
-}
-#endif
+#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
 
 //------------------------------------------------------------------------
 // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
@@ -2068,22 +2033,15 @@ void LinearScan::LoongArch64UpdateRegStateForArg(LclVarDsc* argDsc)
 //
 void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
 {
-#if defined(UNIX_AMD64_ABI)
-    // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
-    // Handle them here.
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
+    // For SystemV-AMD64 and LoongArch64 calls the argDsc
+    // can have 2 registers (for structs.). Handle them here.
     if (varTypeIsStruct(argDsc))
     {
-        unixAmd64UpdateRegStateForArg(argDsc);
+        UpdateRegStateForStructArg(argDsc);
     }
     else
-#endif // defined(UNIX_AMD64_ABI)
-#if defined(TARGET_LOONGARCH64)
-        if (varTypeIsStruct(argDsc))
-    {
-        LoongArch64UpdateRegStateForArg(argDsc);
-    }
-    else
-#endif
+#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
     {
         RegState* intRegState   = &compiler->codeGen->intRegState;
         RegState* floatRegState = &compiler->codeGen->floatRegState;
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index b545254743700..164bb0f0a6643 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -5515,6 +5515,11 @@ void Compiler::fgMoveOpsLeft(GenTree* tree)
 #ifdef TARGET_LOONGARCH64
         else if ((op1->TypeGet() == TYP_LONG) && (ad2->TypeGet() == TYP_INT))
         {
+            // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+            // 64bits-width register which is unlike the AMD64 and ARM64.
+            // And the INT type instruction will be signed-extend by default.
+            // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+            // will be signed-extend by default.
             new_op1->gtType = TYP_LONG;
         }
 #endif
@@ -5812,7 +5817,14 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
             noway_assert(index2 != nullptr);
         }
 
-#ifndef TARGET_LOONGARCH64
+#ifdef TARGET_LOONGARCH64
+        // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+        // 64bits-width register which is unlike the AMD64 and ARM64.
+        // And the INT type instruction will be signed-extend by default.
+        // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+        // will be signed-extend by default.
+        GenTree* arrLen       = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB);
+#else
         // Next introduce a GT_BOUNDS_CHECK node
         var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
 
@@ -5832,8 +5844,6 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
         {
             arrLen = gtNewCastNode(bndsChkType, arrLen, true, bndsChkType);
         }
-#else
-        GenTree* arrLen                  = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB);
 #endif
 
         GenTreeBoundsChk* arrBndsChk = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(index, arrLen, SCK_RNGCHK_FAIL);
@@ -5854,6 +5864,11 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
 #ifdef TARGET_64BIT
 #ifndef TARGET_LOONGARCH64
     // Widen 'index' on 64-bit targets
+    // But For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+    // 64bits-width register which is unlike the AMD64 and ARM64.
+    // And the INT type instruction will be signed-extend by default.
+    // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+    // will be signed-extend by default.
     if (index->TypeGet() != TYP_I_IMPL)
     {
         if (index->OperGet() == GT_CNS_INT)
@@ -14532,7 +14547,12 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
             if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet()))
             {
 #ifdef TARGET_LOONGARCH64
-                if ((op2->TypeGet() == TYP_LONG) /*&& (op1->TypeGet() == TYP_INT)*/)
+                // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+                // 64bits-width register which is unlike the AMD64 and ARM64.
+                // And the INT type instruction will be signed-extend by default.
+                // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+                // will be signed-extend by default.
+                if (op2->TypeGet() == TYP_LONG)
                 {
                     op1->gtType = TYP_LONG;
                 }
@@ -18255,6 +18275,11 @@ void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZ
 {
 // We expect 'addr' to be an address at this point.
 #ifdef TARGET_LOONGARCH64
+    // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+    // 64bits-width register which is unlike the AMD64 and ARM64.
+    // And the INT type instruction will be signed-extend by default.
+    // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+    // will be signed-extend by default.
     assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT ||
            addr->TypeGet() == TYP_REF);
 #else
diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp
index 4038a80476c78..1a3ad619b3da4 100644
--- a/src/coreclr/jit/optimizer.cpp
+++ b/src/coreclr/jit/optimizer.cpp
@@ -5124,6 +5124,11 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu
 
     noway_assert(tree);
 #ifndef TARGET_LOONGARCH64
+    // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+    // 64bits-width register which is unlike the AMD64 and ARM64.
+    // And the INT type instruction will be signed-extend by default.
+    // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+    // will be signed-extend by default. So `LONG != INT(but default is LONG)`
     noway_assert(genActualType(tree->gtType) == genActualType(srct));
 #endif
 
@@ -5294,6 +5299,11 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu
         {
             case GT_AND:
 #ifdef TARGET_LOONGARCH64
+                // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
+                // 64bits-width register which is unlike the AMD64 and ARM64.
+                // And the INT type instruction will be signed-extend by default.
+                // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
+                // will be signed-extend by default. So `LONG != INT(but default is LONG)`
                 noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op1->gtType)));
                 noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op2->gtType)));
 #else

From 967402ec686cb3afa96275950cf8e81409d98211 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Thu, 17 Feb 2022 19:17:07 +0800
Subject: [PATCH 23/46] [LoongArch64] round 3 amend for reviewing by
 @BruceForstall.

---
 src/coreclr/jit/codegencommon.cpp      | 53 ++++++++------------------
 src/coreclr/jit/codegenloongarch64.cpp | 35 +++--------------
 src/coreclr/jit/gentree.cpp            | 14 ++++---
 src/coreclr/jit/instr.h                |  5 ++-
 src/coreclr/jit/morph.cpp              |  4 +-
 src/coreclr/jit/registerloongarch64.h  |  3 ++
 src/coreclr/jit/targetloongarch64.h    |  5 ++-
 7 files changed, 42 insertions(+), 77 deletions(-)

diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 38c98960fcf92..3a67607bc657a 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -1863,10 +1863,9 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi
     }
     else
     {
-// The code to throw the exception will be generated inline, and
-//  we will jump around it in the normal non-exception case.
+        // The code to throw the exception will be generated inline, and
+        // we will jump around it in the normal non-exception case.
 
-#ifndef TARGET_LOONGARCH64
         BasicBlock*  tgtBlk          = nullptr;
         emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
         if (reverseJumpKind != jumpKind)
@@ -1874,18 +1873,15 @@ void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKi
             tgtBlk = genCreateTempLabel();
             inst_JMP(reverseJumpKind, tgtBlk);
         }
-#endif
 
         genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
 
-#ifndef TARGET_LOONGARCH64
         // Define the spot for the normal non-exception case to jump to.
         if (tgtBlk != nullptr)
         {
             assert(reverseJumpKind != jumpKind);
             genDefineTempLabel(tgtBlk);
         }
-#endif
     }
 }
 
@@ -3393,15 +3389,9 @@ void CodeGen::genFnPrologCalleeRegArgs()
 
                 tmp_offset = base;
                 tmp_reg    = REG_R21;
-                if ((0 < base) && (base <= 0xfff))
-                {
-                    GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset);
-                }
-                else
-                {
-                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
-                    GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
-                }
+                GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
+                // NOTE: `REG_R21` will be used within `emitIns_S_R`.
+                // Details see the comment for `emitIns_S_R`.
                 GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8);
             }
 
@@ -3445,15 +3435,9 @@ void CodeGen::genFnPrologCalleeRegArgs()
                         {
                             tmp_offset = base;
                             tmp_reg    = REG_R21;
-                            if ((0 < base) && (base <= 0xfff))
-                            {
-                                GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset);
-                            }
-                            else
-                            {
-                                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
-                                GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
-                            }
+                            GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
+                            // NOTE: `REG_R21` will be used within `emitIns_S_R`.
+                            // Details see the comment for `emitIns_S_R`.
                             GetEmitter()->emitIns_S_R(ins_Store(storeType, true), size, srcRegNum, varNum, -8);
                         }
                         else
@@ -3471,8 +3455,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                     baseOffset = 8;
                     base += 8;
 
-                    GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE,
-                                                  genTotalFrameSize());
+                    GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize());
                     if ((-2048 <= base) && (base < 2048))
                     {
                         GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset);
@@ -3483,15 +3466,9 @@ void CodeGen::genFnPrologCalleeRegArgs()
                         {
                             tmp_offset = base;
                             tmp_reg    = REG_R21;
-                            if ((0 < base) && (base <= 0xfff))
-                            {
-                                GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R0, tmp_offset);
-                            }
-                            else
-                            {
-                                GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
-                                GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
-                            }
+                            GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
+                            // NOTE: `REG_R21` will be used within `emitIns_S_R`.
+                            // Details see the comment for `emitIns_S_R`.
                             GetEmitter()->emitIns_S_R(INS_stx_d, size, REG_ARG_LAST, varNum, -8);
                         }
                         else
@@ -4912,7 +4889,7 @@ void CodeGen::genEnregisterIncomingStackArgs()
 #ifdef TARGET_LOONGARCH64
         {
             bool FPbased;
-            int base = compiler->lvaFrameAddress(varNum, &FPbased);
+            int  base = compiler->lvaFrameAddress(varNum, &FPbased);
 
             if ((-2048 <= base) && (base < 2048))
             {
@@ -4925,8 +4902,8 @@ void CodeGen::genEnregisterIncomingStackArgs()
                     regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
                     tmp_offset     = base;
                     tmp_reg        = REG_R21;
-                    GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, tmp_offset >> 12);
-                    GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, tmp_offset & 0xfff);
+
+                    GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, base);
                     GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_R21, REG_R21, reg2);
                     GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, -8);
                 }
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 69e3886056dba..d778aca393385 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1511,7 +1511,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
             switch (addrInfo.accessType)
             {
                 case IAT_VALUE:
-                    //TODO-LOONGARCH64-CQ: using B/BL for optimization.
+                // TODO-LOONGARCH64-CQ: using B/BL for optimization.
                 case IAT_PVALUE:
                     // Load the address into a register, load indirect and call  through a register
                     // We have to use REG_INDIRECT_CALL_TARGET_REG since we assume the argument registers are in use
@@ -2033,20 +2033,8 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
         {
             // This is only possible for a zero-init or bitcast.
             const bool zeroInit = data->IsIntegralConst(0);
-#if 0
-            //TODO: supporting the SIMD on LoongArch64.
-            if (zeroInit && varTypeIsSIMD(targetType))
-            {
-                assert(!varTypeIsSIMD(targetType));
-                //assert(targetType == TYP_SIMD8);//TODO:TYP_SIMD16
-                assert(targetReg == REG_NA);
-                GetEmitter()->emitIns_S_R(INS_st_d, EA_8BYTE, REG_R0, varNum, 0);
-                genUpdateLife(lclNode);
-                return;
-            }
-#else
+            // TODO-LOONGAARCH64-CQ: not supporting SIMD.
             assert(!varTypeIsSIMD(targetType));
-#endif
 
             if (zeroInit)
             {
@@ -2581,13 +2569,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
             {
                 ssize_t intConst = (int)(divisorOp->AsIntCon()->gtIconVal);
                 divisorReg       = REG_R21;
-                if ((-2048 <= intConst) && (intConst <= 0x7ff))
-                    emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst);
-                else
-                {
-                    emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, intConst >> 12);
-                    emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, intConst & 0xfff);
-                }
+                emit->emitIns_I_la(EA_PTRSIZE, REG_R21, intConst);
             }
             // Only for commutative operations do we check src1 and allow it to be a contained immediate
             else if (tree->OperIsCommutative())
@@ -2601,13 +2583,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                     assert(!divisorOp->isContainedIntOrIImmed());
                     ssize_t intConst = (int)(src1->AsIntCon()->gtIconVal);
                     Reg1             = REG_R21;
-                    if ((-2048 <= intConst) && (intConst <= 0x7ff))
-                        emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_R21, REG_R0, (short)intConst);
-                    else
-                    {
-                        emit->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, intConst >> 12);
-                        emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, intConst & 0xfff);
-                    }
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_R21, intConst);
                 }
             }
             else
@@ -5842,7 +5818,8 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
                                        (ssize_t)compiler->gsGlobalSecurityCookieAddr);
         }
         else
-        { ////TODO:LoongArch64 should amend for optimize!
+        {
+            //TODO-LOONGARCH64: should amend for optimize!
             // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst,
             // (ssize_t)compiler->gsGlobalSecurityCookieAddr);
             // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, );
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index f9eb57fe549ad..ac0ab72012a6e 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -6025,10 +6025,10 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
         case TYP_INT:
 #ifdef TARGET_LOONGARCH64
         case TYP_UINT:
-            // For LoongArch64, the register $r0 is always const-zero with 64bits-width.
-            // Besides the instructions's operation of the 64bits and 32bits using the whole
-            // 64bits-width register which is unlike the AMD64 and ARM64.
-            // So for UINT type, LoongArch64 can't share with INT liking AMD64 and ARM64.
+// For LoongArch64, the register $r0 is always const-zero with 64bits-width.
+// Besides the instructions's operation of the 64bits and 32bits using the whole
+// 64bits-width register which is unlike the AMD64 and ARM64.
+// So for UINT type, LoongArch64 can't share with INT liking AMD64 and ARM64.
 #endif
             zero = gtNewIconNode(0);
             break;
@@ -21898,13 +21898,15 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler*                comp,
                 comp->compFloatingPointUsed = true;
                 assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
                 m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
+                m_regType[1] =
+                    (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[1]) : TYP_INT;
             }
             else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
             {
                 comp->compFloatingPointUsed = true;
                 assert((structSize > 8) == ((floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) > 0));
-                m_regType[0] = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
+                m_regType[0] =
+                    (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? comp->getJitGCType(gcPtrs[0]) : TYP_INT;
                 m_regType[1] = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
             }
             else
diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h
index 87b004eaf7be1..92b1c6f63f065 100644
--- a/src/coreclr/jit/instr.h
+++ b/src/coreclr/jit/instr.h
@@ -308,7 +308,7 @@ enum insOpts : unsigned
 
     INS_OPTS_RC,     // see ::emitIns_R_C().
     INS_OPTS_RL,     // see ::emitIns_R_L().
-    INS_OPTS_JIRL,     // see ::emitIns_J_R().
+    INS_OPTS_JIRL,   // see ::emitIns_J_R().
     INS_OPTS_J,      // see ::emitIns_J().
     INS_OPTS_J_cond, // see ::emitIns_J_cond_la().
     INS_OPTS_I,      // see ::emitIns_I_la().
@@ -318,6 +318,9 @@ enum insOpts : unsigned
 
 enum insBarrier : unsigned
 {
+    // TODO-LOONGARCH64-CQ: ALL there are the same value right now.
+    // These are reserved for future extention.
+    // Because the LoongArch64 doesn't support these right now.
     INS_BARRIER_FULL  =  0,
     INS_BARRIER_WMB   =  INS_BARRIER_FULL,//4,
     INS_BARRIER_MB    =  INS_BARRIER_FULL,//16,
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 164bb0f0a6643..5dda47838191d 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -5333,7 +5333,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTreeCall::Use*
     GenTree* arg     = fgMakeTmpArgNode(argEntry);
 
     // Change the expression to "(tmp=val),tmp"
-    arg                                  = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
+    arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
 
 #endif // FEATURE_FIXED_OUT_ARGS
 
@@ -5823,7 +5823,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
         // And the INT type instruction will be signed-extend by default.
         // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
         // will be signed-extend by default.
-        GenTree* arrLen       = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB);
+        GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB);
 #else
         // Next introduce a GT_BOUNDS_CHECK node
         var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h
index 0d8beac0e3aac..5519b0639de4c 100644
--- a/src/coreclr/jit/registerloongarch64.h
+++ b/src/coreclr/jit/registerloongarch64.h
@@ -49,6 +49,9 @@ REGDEF(S6,     29, 0x20000000, "s6"  )
 REGDEF(S7,     30, 0x40000000, "s7"  )
 REGDEF(S8,     31, 0x80000000, "s8"  )
 
+//NOTE for LoongArch64:
+//  The `REG_R21` which alias `REG_X0` is specially reserved !!!
+//  It can be used only by manully and should be very careful!!!
 REGALIAS(R21, X0)
 
 #define FBASE 32
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
index 2bfaea897abef..465e34af66170 100644
--- a/src/coreclr/jit/targetloongarch64.h
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -6,6 +6,10 @@
 #error The file should not be included for this platform.
 #endif
 
+//NOTE for LoongArch64:
+//  The `REG_R21` which alias `REG_X0` is specially reserved !!!
+//  It can be used only by manully and should be very careful!!!
+
 // clang-format off
   #define CPU_LOAD_STORE_ARCH      1
   #define CPU_HAS_FP_SUPPORT       1
@@ -39,7 +43,6 @@
   #define MAX_MULTIREG_COUNT            2  // Maxiumum number of registers defined by a single instruction (including calls).
                                            // This is also the maximum number of registers for a MultiReg node.
 
-
   #define NOGC_WRITE_BARRIERS      1       // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
   #define USER_ARGS_COME_LAST      1
   #define EMIT_TRACK_STACK_DEPTH   1       // This is something of a workaround.  For both ARM and AMD64, the frame size is fixed, so we don't really

From b9bd532d4b0536e05aa47762222251a8dd8eceb6 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Thu, 17 Feb 2022 19:46:17 +0800
Subject: [PATCH 24/46] [LoongArch64] amend the format.

---
 src/coreclr/jit/codegenloongarch64.cpp |  2 +-
 src/coreclr/jit/compiler.cpp           |  8 +--
 src/coreclr/jit/morph.cpp              | 94 +++++++++++++-------------
 src/coreclr/jit/targetloongarch64.h    |  2 +-
 4 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index d778aca393385..d9051b85cbe0f 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -5819,7 +5819,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
         }
         else
         {
-            //TODO-LOONGARCH64: should amend for optimize!
+            // TODO-LOONGARCH64: should amend for optimize!
             // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst,
             // (ssize_t)compiler->gsGlobalSecurityCookieAddr);
             // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, );
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 9c68c153e33b1..278f6466aca6a 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -935,14 +935,14 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
 #ifdef TARGET_LOONGARCH64
     if (structSize <= (TARGET_POINTER_SIZE * 2))
     {
-        uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd);
+        uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd);
 
-        if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_ONE)
+        if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE)
         {
             howToReturnStruct = SPK_PrimitiveType;
-            useType           = structSize > 4 ? TYP_DOUBLE : TYP_FLOAT;
+            useType           = (structSize > 4) ? TYP_DOUBLE : TYP_FLOAT;
         }
-        else if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
+        else if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
         {
             howToReturnStruct = SPK_ByValue;
             useType           = TYP_STRUCT;
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 5dda47838191d..6ef1c589ed297 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -2946,7 +2946,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
 #elif defined(TARGET_X86)
 
-        passUsingFloatRegs   = false;
+        passUsingFloatRegs    = false;
 
 #elif defined(TARGET_LOONGARCH64)
         assert(!callIsVararg);
@@ -3070,7 +3070,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             byteSize = genTypeSize(argx);
         }
 #elif defined(TARGET_LOONGARCH64)
-        DWORD numFloatFields = 0;
+        DWORD floatFieldFlags = 0;
         if (!isStructArg)
         {
             size     = 1;
@@ -3107,14 +3107,14 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             {
                 assert((howToPassStruct == SPK_ByValue) || (howToPassStruct == SPK_PrimitiveType));
 
-                numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
+                floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
 
-                passUsingFloatRegs = (numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false;
+                passUsingFloatRegs = (floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false;
                 compFloatingPointUsed |= passUsingFloatRegs;
 
-                if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO))
+                if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO))
                     size = 1;
-                else if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO)
+                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
                     size = 2;
             }
             else // if (passStructByRef)
@@ -3278,28 +3278,29 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
                 if (isStructArg)
                 {
-                    if ((numFloatFields & 0x6) && passUsingFloatRegs)
+                    if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) &&
+                        passUsingFloatRegs)
                         passUsingFloatRegs = isRegArg = intArgRegNum < maxRegArgs;
 
                     if (!passUsingFloatRegs)
                     {
-                        size           = structSize > 8 ? 2 : 1;
-                        numFloatFields = 0;
+                        size            = structSize > 8 ? 2 : 1;
+                        floatFieldFlags = 0;
                     }
                     else if (passUsingFloatRegs)
                     {
-                        if (numFloatFields & 0x8)
+                        if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
                             nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1);
-                        else if (numFloatFields & 0x4)
+                        else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
                         {
                             assert(size == 1);
                             size               = 2;
                             passUsingFloatRegs = false;
                             nextOtherRegNum    = genMapFloatRegArgNumToRegNum(nextFltArgRegNum);
                         }
-                        else if (/*(size == 1) && */ (numFloatFields & 0x2))
+                        else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST)
                         {
-                            assert((size == 1) && (numFloatFields & 0x2));
+                            assert(size == 1);
                             size            = 2;
                             nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum);
                         }
@@ -3510,25 +3511,24 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                         // assert(nextOtherRegNum == REG_STK);
                         intArgRegNum = maxRegArgs;
                     }
-                    else if ((numFloatFields & 0xf) == 0x0)
+                    else if ((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) == 0x0)
                     {
                         if (passUsingFloatRegs)
                             fltArgRegNum += 1;
                         else
                             intArgRegNum += size;
                     }
-                    else if (numFloatFields & 0x1)
+                    else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE)
                     {
                         structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT;
                         fltArgRegNum += 1;
                     }
-                    else if (numFloatFields & 0x6)
+                    else if (floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND))
                     {
-                        // assert((numFloatFields & 0x2) || (numFloatFields & 0x4));
                         fltArgRegNum += 1;
                         intArgRegNum += 1;
                     }
-                    else if (numFloatFields & 0x8)
+                    else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
                     {
                         fltArgRegNum += 2;
                     }
@@ -3617,8 +3617,10 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
         {
             newArgEntry->passedByRef = passStructByRef;
 #if defined(TARGET_LOONGARCH64)
-            newArgEntry->argType =
-                (numFloatFields & 0xe) || (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
+            newArgEntry->argType = ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) ||
+                                    (structBaseType == TYP_UNKNOWN))
+                                       ? argx->TypeGet()
+                                       : structBaseType;
 #else
             newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
 #endif
@@ -4973,35 +4975,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             unsigned offset = baseOffset;
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
 #if defined(TARGET_LOONGARCH64)
-            uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
-            if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
+            uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
+            if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
             {
-                assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);
+                assert((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);
                 var_types tmp_type_1;
                 var_types tmp_type_2;
 
                 compFloatingPointUsed = true;
-                if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO)
+                if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
                 {
-                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
                 }
-                else if (numFloatFields & STRUCT_FLOAT_FIELD_FIRST)
+                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST)
                 {
-                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? type[1] : TYP_INT;
+                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? type[1] : TYP_INT;
                 }
-                else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND)
+                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
                 {
-                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? type[0] : TYP_INT;
-                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? type[0] : TYP_INT;
+                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
                 }
                 else
                 {
                     assert(!"----------------unimplemented type-case... on LOONGARCH");
                     unreached();
                 }
-                elemSize = numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK ? 8 : 4;
+                elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4;
 
                 GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset);
                 newArg->AddField(this, nextLclFld, offset, tmp_type_1);
@@ -5060,35 +5062,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
             unsigned offset = 0;
 #if defined(TARGET_LOONGARCH64)
-            uint32_t numFloatFields = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
-            if (numFloatFields & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
+            uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
+            if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
             {
-                assert((numFloatFields & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);
+                assert((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);
                 var_types tmp_type_1;
                 var_types tmp_type_2;
 
                 compFloatingPointUsed = true;
-                if (numFloatFields & STRUCT_FLOAT_FIELD_ONLY_TWO)
+                if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
                 {
-                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
                 }
-                else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND)
+                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
                 {
-                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? type[1] : TYP_INT;
+                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? type[1] : TYP_INT;
                 }
-                else if (numFloatFields & STRUCT_FLOAT_FIELD_SECOND)
+                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
                 {
-                    tmp_type_1 = numFloatFields & STRUCT_FIRST_FIELD_SIZE_IS8 ? type[0] : TYP_INT;
-                    tmp_type_2 = numFloatFields & STRUCT_SECOND_FIELD_SIZE_IS8 ? TYP_DOUBLE : TYP_FLOAT;
+                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? type[0] : TYP_INT;
+                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
                 }
                 else
                 {
                     assert(!"----------------unimplemented type-case... on LOONGARCH");
                     unreached();
                 }
-                elemSize = numFloatFields & STRUCT_HAS_8BYTES_FIELDS_MASK ? 8 : 4;
+                elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4;
 
                 GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr);
                 // For safety all GT_IND should have at least GT_GLOB_REF set.
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
index 465e34af66170..8e74ea0334a9c 100644
--- a/src/coreclr/jit/targetloongarch64.h
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -6,7 +6,7 @@
 #error The file should not be included for this platform.
 #endif
 
-//NOTE for LoongArch64:
+// NOTE for LoongArch64:
 //  The `REG_R21` which alias `REG_X0` is specially reserved !!!
 //  It can be used only by manully and should be very careful!!!
 

From 6789232f2576c3d335318a7d5299dfaceab6c969 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Sat, 19 Feb 2022 14:21:43 +0800
Subject: [PATCH 25/46] [LoongArch64] round 4 amending for reviewing.

---
 src/coreclr/jit/codegen.h              |  11 +-
 src/coreclr/jit/codegencommon.cpp      |  30 +-
 src/coreclr/jit/codegenlinear.cpp      |   2 +-
 src/coreclr/jit/codegenloongarch64.cpp | 490 ++++++++-----------------
 src/coreclr/jit/emit.h                 |   9 +-
 src/coreclr/jit/emitfmtsloongarch64.h  |  14 +-
 src/coreclr/jit/emitjmps.h             |   1 +
 src/coreclr/jit/emitloongarch64.h      | 141 -------
 8 files changed, 186 insertions(+), 512 deletions(-)

diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 946470d924dff..cd144bf8e8a4e 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -946,10 +946,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genLeaInstruction(GenTreeAddrMode* lea);
     void genSetRegToCond(regNumber dstReg, GenTree* tree);
 
-#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARMARCH)
     void genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale);
     void genCodeForMulLong(GenTreeOp* mul);
-#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
+#endif // TARGET_ARMARCH
 
 #if !defined(TARGET_64BIT)
     void genLongToIntCast(GenTree* treeNode);
@@ -1267,8 +1267,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     void genCodeForStoreLclFld(GenTreeLclFld* tree);
     void genCodeForStoreLclVar(GenTreeLclVar* tree);
     void genCodeForReturnTrap(GenTreeOp* tree);
-    void genCodeForJcc(GenTreeCC* tree);
-    void genCodeForSetcc(GenTreeCC* setcc);
     void genCodeForStoreInd(GenTreeStoreInd* tree);
     void genCodeForSwap(GenTreeOp* tree);
     void genCodeForCpObj(GenTreeObj* cpObjNode);
@@ -1575,6 +1573,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
 #endif // TARGET_XARCH
 
+#ifndef TARGET_LOONGARCH64
     // Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions
     // such as X86's SETcc. A sequence of instructions rather than just a single one is required for
     // certain floating point conditions.
@@ -1618,6 +1617,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
     void inst_JCC(GenCondition condition, BasicBlock* target);
     void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg);
+
+    void genCodeForJcc(GenTreeCC* tree);
+    void genCodeForSetcc(GenTreeCC* setcc);
+#endif // !TARGET_LOONGARCH64
 };
 
 // A simple phase that just invokes a method on the codegen instance
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 054fe84564ea6..1c6cd7f1c1770 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -2999,20 +2999,20 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
     //
     struct regArgElem
     {
-        unsigned  varNum;  // index into compiler->lvaTable[] for this register argument
+        unsigned varNum; // index into compiler->lvaTable[] for this register argument
 #if defined(UNIX_AMD64_ABI)
-        var_types type;    // the Jit type of this regArgTab entry
-#endif // defined(UNIX_AMD64_ABI)
-        unsigned  trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
-                           // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
-                           // argument register number 'x'. Only used when circular = true.
-        char slot;         // 0 means the register is not used for a register argument
-                           // 1 means the first part of a register argument
-                           // 2, 3 or 4  means the second,third or fourth part of a multireg argument
-        bool stackArg;     // true if the argument gets homed to the stack
-        bool writeThru;    // true if the argument gets homed to both stack and register
-        bool processed;    // true after we've processed the argument (and it is in its final location)
-        bool circular;     // true if this register participates in a circular dependency loop.
+        var_types type;   // the Jit type of this regArgTab entry
+#endif                    // defined(UNIX_AMD64_ABI)
+        unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
+                          // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
+                          // argument register number 'x'. Only used when circular = true.
+        char slot;        // 0 means the register is not used for a register argument
+                          // 1 means the first part of a register argument
+                          // 2, 3 or 4  means the second,third or fourth part of a multireg argument
+        bool stackArg;    // true if the argument gets homed to the stack
+        bool writeThru;   // true if the argument gets homed to both stack and register
+        bool processed;   // true after we've processed the argument (and it is in its final location)
+        bool circular;    // true if this register participates in a circular dependency loop.
 
 #ifdef UNIX_AMD64_ABI
 
@@ -3815,10 +3815,10 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
             else
 #endif // TARGET_XARCH
             {
-                var_types destMemType    = varDscDest->TypeGet();
+                var_types destMemType = varDscDest->TypeGet();
 
 #ifdef TARGET_ARM
-                bool      cycleAllDouble = true; // assume the best
+                bool cycleAllDouble = true; // assume the best
 
                 unsigned iter = begReg;
                 do
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index fe38626f81541..bda049dd37abb 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -2648,7 +2648,6 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
 
     inst_JCC(condition, compiler->compCurBB->bbJumpDest);
 }
-#endif // !TARGET_LOONGARCH64
 
 //------------------------------------------------------------------------
 // genCodeForJcc: Generate code for a GT_JCC node.
@@ -2706,3 +2705,4 @@ void CodeGen::genCodeForSetcc(GenTreeCC* setcc)
     inst_SETCC(setcc->gtCondition, setcc->TypeGet(), setcc->GetRegNum());
     genProduceReg(setcc);
 }
+#endif // !TARGET_LOONGARCH64
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index d59c99c07b2ce..2d856550a5486 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -26,35 +26,23 @@ static short splitLow(int value)
     return (value & 0xffff);
 }
 
-// Returns true if 'value' is a legal signed immediate 16 bit encoding.
-static bool isValidSimm16(ssize_t value)
-{
-    return -(((int)1) << 15) <= value && value < (((int)1) << 15);
-};
-
-// Returns true if 'value' is a legal unsigned immediate 16 bit encoding.
-static bool isValidUimm16(ssize_t value)
-{
-    return (0 == (value >> 16));
-};
-
 // Returns true if 'value' is a legal signed immediate 12 bit encoding.
 static bool isValidSimm12(ssize_t value)
 {
     return -(((int)1) << 11) <= value && value < (((int)1) << 11);
-};
+}
 
 // Returns true if 'value' is a legal unsigned immediate 11 bit encoding.
 static bool isValidUimm11(ssize_t value)
 {
     return (0 == (value >> 11));
-};
+}
 
 // Returns true if 'value' is a legal unsigned immediate 12 bit encoding.
 static bool isValidUimm12(ssize_t value)
 {
     return (0 == (value >> 12));
-};
+}
 
 /*
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -73,10 +61,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 //
 // However the imm might not fit as a directly encodable immediate,
 // when it doesn't fit we generate extra instruction(s) that sets up
-// the 'regTmp' with the proper immediate value.
+// the 'tmpReg' with the proper immediate value.
 //
-//     li64  regTmp, imm
-//     ins  reg1, reg2, regTmp
+//     li  tmpReg, imm    // li is pseudo instruction here which maybe 2-4 instructions.
+//     ins  reg1, reg2, tmpReg
 //
 // Arguments:
 //    ins                 - instruction
@@ -104,7 +92,7 @@ bool CodeGen::genInstrWithConstant(instruction ins,
 
     // reg1 is usually a dest register
     // reg2 is always source register
-    assert(tmpReg != reg2); // regTmp can not match any source register
+    assert(tmpReg != reg2); // tmpReg can not match any source register
 
 #ifdef DEBUG
     switch (ins)
@@ -131,7 +119,7 @@ bool CodeGen::genInstrWithConstant(instruction ins,
             break;
     }
 #endif
-    bool immFitsInIns = (-2048 <= imm) && (imm <= 2047);
+    bool immFitsInIns = isValidSimm12(imm);
 
     if (immFitsInIns)
     {
@@ -1651,7 +1639,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
     assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0);
     assert(untrLclLo % 4 == 0);
 
-    if ((-2048 <= untrLclLo) && (untrLclLo < 2048))
+    if (isValidSimm12(untrLclLo))
     {
         GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
     }
@@ -1697,7 +1685,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
                                                                                 // argument reg
         instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
 
-        /* TODO for LA: maybe optimize further */
+        // TODO-LOONGARCH64: maybe optimize further
         GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 8 + padding);
         GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, rAddr, 0 + padding);
         GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rCnt, rCnt, -1);
@@ -1829,7 +1817,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
                                      regNumber reg,
                                      ssize_t   imm,
                                      insFlags flags DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags))
-{ // maybe optimize.
+{
     emitter* emit = GetEmitter();
 
     if (!compiler->opts.compReloc)
@@ -1839,12 +1827,12 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr  size,
 
     if (EA_IS_RELOC(size))
     {
-        assert(genIsValidIntReg(reg));              // TODO: maybe optimize!!!
+        assert(genIsValidIntReg(reg));
         emit->emitIns_R_AI(INS_bl, size, reg, imm); // for example: EA_PTR_DSP_RELOC
     }
     else
     {
-        emit->emitIns_I_la(size, reg, imm); // TODO: maybe optimize.
+        emit->emitIns_I_la(size, reg, imm);
     }
 
     regSet.verifyRegUsed(reg);
@@ -1896,10 +1884,6 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
                 // We will just zero out the entire vector register for both float and double
                 emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, REG_R0);
             }
-            /*else if (emitter::emitIns_valid_imm_for_fmov(constValue))
-            {// LOONGARCH64 doesn't need this.
-                assert(!"unimplemented on LOONGARCH yet");
-            }*/
             else
             {
                 // Get a temp integer register to compute long address.
@@ -1928,26 +1912,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
 // Produce code for a GT_INC_SATURATE node.
 void CodeGen::genCodeForIncSaturate(GenTree* tree)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    regNumber targetReg = tree->GetRegNum();
-
-    // The arithmetic node must be sitting in a register (since it's not contained)
-    assert(!tree->isContained());
-    // The dst can only be a register.
-    assert(targetReg != REG_NA);
-
-    GenTree* operand = tree->gtGetOp1();
-    assert(!operand->isContained());
-    // The src must be a register.
-    regNumber operandReg = genConsumeReg(operand);
-
-    GetEmitter()->emitIns_R_R_I(INS_addi_d, emitActualTypeSize(tree), targetReg, operandReg, 1);
-    GetEmitter()->emitIns_R_R_I(INS_bne, emitActualTypeSize(tree), targetReg, REG_R0, 2);
-    GetEmitter()->emitIns_R_R_R(INS_andn, emitActualTypeSize(tree), targetReg, targetReg, REG_R0);
-
-    genProduceReg(tree);
-#endif
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 // Generate code to get the high N bits of a N*N=2N bit multiplication result
@@ -2152,7 +2117,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
         unsigned int regCount   = varDsc->lvFieldCnt;
         for (unsigned i = 0; i < regCount; ++i)
         {
-            assert(!"unimplemented on LOONGARCH yet");
+            NYI("unimplemented on LOONGARCH64 yet");
             regNumber varReg = lclNode->GetRegByIndex(i);
             assert(varReg != REG_NA);
             unsigned   fieldLclNum = varDsc->lvFieldLclStart + i;
@@ -2180,7 +2145,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
         {
             // This is only possible for a zero-init or bitcast.
             const bool zeroInit = data->IsIntegralConst(0);
-            // TODO-LOONGAARCH64-CQ: not supporting SIMD.
+            // TODO-LOONGARCH64-CQ: not supporting SIMD.
             assert(!varTypeIsSIMD(targetType));
 
             if (zeroInit)
@@ -2655,7 +2620,7 @@ void CodeGen::genCodeForNegNot(GenTree* tree)
 //
 void CodeGen::genCodeForBswap(GenTree* tree)
 {
-    assert(!"unimpleement on LOONGAARCH64 yet");
+    assert(!"unimpleement on LOONGARCH64 yet");
 }
 
 //------------------------------------------------------------------------
@@ -2834,7 +2799,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                     else
                         ins = INS_mod_wu;
 
-                    // TODO: temp workround, should amend for optimize.
+                    // TODO-LOONGARCH64: here is just for signed-extension ?
                     emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0);
                     emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, divisorReg, divisorReg, 0);
                 }
@@ -3141,7 +3106,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
     if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
     {
         // issue a INS_BARRIER_RMB after a volatile CpObj operation
-        ////TODO: there is only BARRIER_FULL for LOONGARCH64.
+        // TODO-LOONGARCH64: there is only BARRIER_FULL for LOONGARCH64.
         instGen_MemoryBarrier(BARRIER_FULL);
     }
 
@@ -3166,7 +3131,6 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode)
     GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, baseReg, baseReg, 0);
 
     // add it to the absolute address of fgFirstBB
-    // compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;//TODO for LOONGARCH64.
     GetEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
     GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, baseReg, baseReg, tmpReg);
 
@@ -3219,7 +3183,7 @@ void CodeGen::genJumpTable(GenTree* treeNode)
 //
 void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //------------------------------------------------------------------------
@@ -3230,7 +3194,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
 //
 void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 static inline bool isImmed(GenTree* treeNode)
@@ -3577,13 +3541,13 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
         callType   = emitter::EC_INDIR_R;
         callTarget = REG_DEFAULT_HELPER_CALL_TARGET;
 
-        // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
         if (compiler->opts.compReloc)
         {
             GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
         }
         else
         {
+            // TODO-LOONGARCH64: maybe optimize further.
             // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, callTarget, (ssize_t)pAddr);
             // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, );
             GetEmitter()->emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, callTarget, ((ssize_t)pAddr & 0xfffff000) >> 12);
@@ -3591,7 +3555,6 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
             GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, callTarget, callTarget, ((ssize_t)pAddr & 0xfff) >> 2);
         }
         regSet.verifyRegUsed(callTarget);
-        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
     }
     else
     {
@@ -3599,7 +3562,7 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)
         callTarget = REG_NA;
     }
 
-    ////TODO: can optimize further !!!
+    // TODO-LOONGARCH64: can optimize further !!!
     GetEmitter()->emitIns_Call(callType, compiler->eeFindHelper(CORINFO_HELP_STOP_FOR_GC),
                                INDEBUG_LDISASM_COMMA(nullptr) addr, 0, EA_UNKNOWN, EA_UNKNOWN, gcInfo.gcVarPtrSetCur,
                                gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, DebugInfo(), /* IL offset */
@@ -3647,10 +3610,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
         //  as that is where 'addr' must go.
         noway_assert(data->GetRegNum() != REG_WRITE_BARRIER_DST_BYREF);
 
-        // 'addr' goes into x14 (REG_WRITE_BARRIER_DST)
+        // 'addr' goes into REG_T6 (REG_WRITE_BARRIER_DST)
         genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST);
 
-        // 'data' goes into x15 (REG_WRITE_BARRIER_SRC)
+        // 'data' goes into REG_T7 (REG_WRITE_BARRIER_SRC)
         genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC);
 
         genGCWriteBarrier(tree, writeBarrierForm);
@@ -3699,7 +3662,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
 //
 void CodeGen::genCodeForSwap(GenTreeOp* tree)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //------------------------------------------------------------------------
@@ -3746,7 +3709,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
     genConsumeOperands(treeNode->AsOp());
 
     if (IsUnsigned)
-    {                                                                                   // should amend.
+    {
         emit->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, REG_SCRATCH_FLT, op1->GetRegNum()); // save op1
 
         if (srcSize == EA_8BYTE)
@@ -4189,7 +4152,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
     }
     else
     {
-        // TODO:can optimize further.
         if (op1->isContainedIntOrIImmed())
         {
             op1 = tree->gtOp2;
@@ -4410,9 +4372,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
 //
 void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
 {
-    // assert(compiler->compCurBB->bbJumpKind == BBJ_COND);//should confirm.
-    ////assert(jtrue->OperIs(GT_JTRUE));
-
     emitter* emit = GetEmitter();
 
     GenTreeOp*  tree      = jtrue->OperIs(GT_JTRUE) ? jtrue->gtGetOp1()->AsOp() : jtrue;
@@ -4450,11 +4409,11 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
         if (varTypeIsFloating(op1Type))
         {
             assert(genTypeSize(op1Type) == genTypeSize(op2Type));
-            // int cc = 1;
 
             assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
             bool IsUnordered = (tree->gtFlags & GTF_RELOP_NAN_UN) != 0;
 
+            // here default use cc = 1 for float comparing.
             if (tree->OperIs(GT_EQ))
             {
                 ins = INS_bcnez;
@@ -4516,7 +4475,6 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                                         op2->GetRegNum(), 1 /*cc*/);
             }
 
-            // assert(0 <= cc && cc < 8);
             if (IsEq)
                 emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, (int)1 /*cc*/); // 5-bits;
             else
@@ -4550,9 +4508,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                     imm2 = static_cast<int8_t>(imm2);
                 }
                 break;
-                // case EA_2BYTE:
-                //    imm = static_cast<uint16_t>(imm);
-                //    break;
+
                 default:
                     assert(!"Unexpected type in jumpTrue.");
             }
@@ -4608,7 +4564,6 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
         }
         else
         {
-            // TODO:can optimize further.
             if (op1->isContainedIntOrIImmed())
             {
                 op1 = tree->gtOp2;
@@ -4657,14 +4612,12 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                         case EA_1BYTE:
                             imm = static_cast<int8_t>(imm);
                             break;
-                        // case EA_2BYTE:
-                        //    imm = static_cast<uint16_t>(imm);
-                        //    break;
+
                         default:
                             assert(!"Unexpected type in jumpTrue(imm).");
                     }
 
-                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm); // TODO: maybe optimize.
+                    emit->emitIns_I_la(EA_PTRSIZE, REG_RA, imm);
                 }
                 else
                 {
@@ -4707,7 +4660,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                 regNumber regOp2 = op2->GetRegNum();
                 if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) &&
                     compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
-                { // TODO: should amend further!!!
+                {
                     regNumber tmpRegOp1 = tree->ExtractTempReg();
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
@@ -4716,7 +4669,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                 }
                 else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) &&
                          compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate())
-                { // TODO: should amend further!!!
+                {
                     regNumber tmpRegOp1 = tree->ExtractTempReg();
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
@@ -4725,12 +4678,12 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                 }
                 else if (cmpSize == EA_4BYTE && op1->OperIs(GT_CALL) && op2->OperIs(GT_LCL_VAR) &&
                          compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
-                { // TODO: should amend further!!!
+                {
                     emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, regOp2, 0);
                     regOp2 = REG_RA;
                 }
                 else if (cmpSize == EA_4BYTE && ((op1->gtFlags | op2->gtFlags) & GTF_UNSIGNED))
-                { // TODO: should amend further!!!
+                {
                     if (!(op1->gtFlags & GTF_UNSIGNED))
                     {
                         regNumber tmpRegOp1 = tree->ExtractTempReg();
@@ -4833,46 +4786,31 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
     regNumber reg  = op1->GetRegNum();
     emitAttr  attr = emitActualTypeSize(op1->TypeGet());
 
-    // if (tree->gtFlags & GTF_JCMP_TST)
-    //{
-    //    assert(!"unimplemented on LOONGARCH yet");
-    //    //ssize_t compareImm = op2->AsIntCon()->IconValue();
-
-    //    //assert(isPow2(compareImm));
-
-    //    //instruction ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_tbz : INS_tbnz;
-    //    //int         imm = genLog2((size_t)compareImm);
-
-    //    //GetEmitter()->emitIns_J_R_I(ins, attr, compiler->compCurBB->bbJumpDest, reg, imm);
-    //}
-    // else
+    instruction ins;
+    int         regs;
+    if (op2->AsIntCon()->gtIconVal)
     {
-        instruction ins;
-        int         regs;
-        if (op2->AsIntCon()->gtIconVal)
-        {
-            assert(reg != REG_R21);
-            ssize_t imm = op2->AsIntCon()->gtIconVal;
-            if (attr == EA_4BYTE)
-            {
-                assert(reg != REG_RA);
-                imm = (int32_t)imm;
-                GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0);
-                reg = REG_RA;
-            }
-            GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
-            regs = (int)reg << 5;
-            regs |= (int)REG_R21; // REG_R21
-            ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne;
-        }
-        else
+        assert(reg != REG_R21);
+        ssize_t imm = op2->AsIntCon()->gtIconVal;
+        if (attr == EA_4BYTE)
         {
-            regs = (int)reg;
-            ins  = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez;
+            assert(reg != REG_RA);
+            imm = (int32_t)imm;
+            GetEmitter()->emitIns_R_R_I(INS_slli_w, EA_4BYTE, REG_RA, reg, 0);
+            reg = REG_RA;
         }
-
-        GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs); // 5-bits;
+        GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, imm);
+        regs = (int)reg << 5;
+        regs |= (int)REG_R21;
+        ins = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beq : INS_bne;
     }
+    else
+    {
+        regs = (int)reg;
+        ins  = (tree->gtFlags & GTF_JCMP_EQ) ? INS_beqz : INS_bnez;
+    }
+
+    GetEmitter()->emitIns_J(ins, compiler->compCurBB->bbJumpDest, regs); // 5-bits;
 }
 
 //---------------------------------------------------------------------
@@ -4886,8 +4824,6 @@ int CodeGenInterface::genSPtoFPdelta() const
     int delta;
     if (IsSaveFpRaWithAllCalleeSavedRegisters())
     {
-        // delta = (compiler->compCalleeRegsPushed -2)* REGSIZE_BYTES + compiler->compLclFrameSize;
-        // assert(delta == genTotalFrameSize() - compiler->lvaArgSize - 2*8);
         delta = genTotalFrameSize() - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
                 (compiler->compCalleeRegsPushed - 1) * REGSIZE_BYTES;
     }
@@ -4990,9 +4926,9 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
     if (addr == nullptr)
     {
         // This is call to a runtime helper.
-        // li x, pAddr     #NOTE: this maybe muti-instructions.
-        // ld x, [x]
-        // jr x
+        // li reg, pAddr     #NOTE: this maybe muti-instructions.
+        // ld_d reg, reg
+        // jirl reg
 
         if (callTargetReg == REG_NA)
         {
@@ -5009,10 +4945,9 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
 
         callTarget = callTargetReg;
 
-        // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
-        // GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
         if (compiler->opts.compReloc)
         {
+            // TODO-LOONGARCH64: here the bl is special flag rather than a real instruction.
             GetEmitter()->emitIns_R_AI(INS_bl, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
         }
         else
@@ -5059,12 +4994,12 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
 // TODO-CLEANUP Merge all versions of this function and move to new file simdcodegencommon.cpp.
 void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
     return INS_OPTS_NONE;
 }
 
@@ -5081,7 +5016,7 @@ insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType)
 //
 instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
     return INS_invalid;
 }
 
@@ -5096,7 +5031,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
 //
 void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //-------------------------------------------------------------------------------------------
@@ -5111,7 +5046,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //----------------------------------------------------------------------------------
@@ -5125,7 +5060,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //--------------------------------------------------------------------------------
@@ -5139,7 +5074,7 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //--------------------------------------------------------------------------------
@@ -5154,7 +5089,7 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //--------------------------------------------------------------------------------
@@ -5169,7 +5104,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //--------------------------------------------------------------------------------
@@ -5184,7 +5119,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //--------------------------------------------------------------------------------
@@ -5198,7 +5133,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //------------------------------------------------------------------------------------
@@ -5212,7 +5147,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //------------------------------------------------------------------------------------
@@ -5226,7 +5161,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //-----------------------------------------------------------------------------
@@ -5249,7 +5184,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //-----------------------------------------------------------------------------
@@ -5271,7 +5206,7 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //-----------------------------------------------------------------------------
@@ -5288,7 +5223,7 @@ void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
 //
 void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //-----------------------------------------------------------------------------
@@ -5305,7 +5240,7 @@ void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
 //
 void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //-----------------------------------------------------------------------------
@@ -5321,7 +5256,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
 //
 void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 #endif // FEATURE_SIMD
@@ -5690,14 +5625,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             genCodeForJumpCompare(treeNode->AsOp());
             break;
 
-        case GT_JCC:
-            genCodeForJcc(treeNode->AsCC());
-            break;
-
-        case GT_SETCC:
-            genCodeForSetcc(treeNode->AsCC());
-            break;
-
         case GT_RETURNTRAP:
             genCodeForReturnTrap(treeNode->AsOp());
             break;
@@ -5865,7 +5792,7 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type)
     // The only TYP_REF constant that can come this path is a managed 'null' since it is not
     // relocatable.  Other ref type constants (e.g. string objects) go through a different
     // code path.
-    noway_assert(type != TYP_REF || val == 0);
+    noway_assert((type != TYP_REF) || (val == 0));
 
     GetEmitter()->emitIns_I_la(emitActualTypeSize(type), reg, val);
     regSet.verifyRegUsed(reg);
@@ -5966,7 +5893,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
         }
         else
         {
-            // TODO-LOONGARCH64: should amend for optimize!
+            // TODO-LOONGARCH64: maybe optimize further!
             // GetEmitter()->emitIns_R_I(INS_pcaddu12i, EA_PTRSIZE, regGSConst,
             // (ssize_t)compiler->gsGlobalSecurityCookieAddr);
             // GetEmitter()->emitIns_R_R_I(INS_ldptr_d, EA_PTRSIZE, regGSConst, regGSConst, );
@@ -6002,7 +5929,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
 //
 void CodeGen::genIntrinsic(GenTree* treeNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI("unimplemented on LOONGARCH64 yet");
 }
 
 //---------------------------------------------------------------------
@@ -6068,7 +5995,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
     {
         if (varTypeIsSIMD(targetType))
         {
-            assert(!"unimplemented on LOONGARCH yet");
+            NYI("unimplemented on LOONGARCH64 yet");
         }
 
         instruction storeIns  = ins_Store(targetType);
@@ -6569,7 +6496,7 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
 
     if (treeNode->GetRegNum() != REG_NA)
     {
-        assert(!"unimplemented on LOONGARCH yet");
+        NYI("unimplemented on LOONGARCH64 yet");
         // Right now the only enregistrable multi-reg return types supported are SIMD types.
         assert(varTypeIsSIMD(treeNode));
         assert(regCount != 0);
@@ -7002,35 +6929,45 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
     // The index is never contained, even if it is a constant.
     assert(index->isUsedFromReg());
 
-    // const regNumber tmpReg = node->GetSingleTempReg();
-
     // Generate the bounds check if necessary.
     if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
     {
         GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R21, base->GetRegNum(), node->gtLenOffset);
-        //   if (index >= REG_R21)
-        //   {
-        //     JumpToThrowHlpBlk;
-        //   }
+        // if (index >= REG_R21)
+        // {
+        //   JumpToThrowHlpBlk;
+        // }
         //
-        //   sltu  AT, index, REG_R21
-        //   bne  AT, zero, RngChkExit
+        // sltu  REG_R21, index, REG_R21
+        // bne  REG_21, zero, RngChkExit
         // IndRngFail:
-        //   ...
+        // ...
         // RngChkExit:
         genJumpToThrowHlpBlk_la(SCK_RNGCHK_FAIL, INS_bgeu, index->GetRegNum(), node->gtIndRngFailBB, REG_R21);
     }
 
     emitAttr attr = emitActualTypeSize(node);
-    // Can we use a ScaledAdd instruction?
+    // Can we use a shift instruction for multiply ?
     //
-    if (isPow2(node->gtElemSize) && (node->gtElemSize <= 2048))
+    if (isPow2(node->gtElemSize) && (node->gtElemSize < 0x10000000u))
     {
-        DWORD scale;
-        BitScanForward(&scale, node->gtElemSize);
+        regNumber tmpReg;
+        if (node->gtElemSize == 0)
+        {
+            // dest = base + index
+            tmpReg = index->GetRegNum();
+        }
+        else
+        {
+            DWORD scale;
+            BitScanForward(&scale, node->gtElemSize);
 
-        // dest = base + index * scale
-        genScaledAdd(emitActualTypeSize(node), node->GetRegNum(), base->GetRegNum(), index->GetRegNum(), scale);
+            // tmpReg = base + index << scale
+            // dest = base + tmpReg
+            GetEmitter()->emitIns_R_R_I(INS_slli_d, attr, REG_R21, index->GetRegNum(), scale);
+            tmpReg = REG_R21;
+        }
+        GetEmitter()->emitIns_R_R_R(INS_add_d, attr, node->GetRegNum(), base->GetRegNum(), tmpReg);
     }
     else // we have to load the element size and use a MADD (multiply-add) instruction
     {
@@ -7218,7 +7155,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
 
     if (size >= 2 * REGSIZE_BYTES)
     {
-        regNumber tempReg2 = REG_R21; // cpBlkNode->ExtractTempReg(RBM_ALLINT);//TODO:should amend.
+        regNumber tempReg2 = REG_R21;
 
         for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize;
              size -= regSize, srcOffset += regSize, dstOffset += regSize)
@@ -7390,12 +7327,6 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
                 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
 
                 genConsumeReg(putArgRegNode);
-#if 0
-                inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ true, argReg, putArgRegNode->GetRegNum(),
-                                /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL));
-
-                argReg = genRegArgNext(argReg);
-#endif
             }
         }
 #if FEATURE_ARG_SPLIT
@@ -7840,7 +7771,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
 
         if (compiler->info.compIsVarArgs)
         {
-            assert(!"unimplemented on LOONGARCH yet!");
+            NYI("unimplemented on LOONGARCH64 yet");
             // In case of a jmp call to a vararg method ensure only integer registers are passed.
             assert((genRegMask(argReg) & (RBM_ARG_REGS)) != RBM_NONE);
             assert(!varDsc->lvIsHfaRegArg());
@@ -7963,14 +7894,14 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d
             instruction ins;
 
             if (castMaxValue > 2047)
-            { // should amend.   should confirm !?!?
+            {
                 assert((castMaxValue == 32767) || (castMaxValue == 65535));
                 GetEmitter()->emitIns_I_la(EA_ATTR(desc.CheckSrcSize()), REG_R21, castMaxValue + 1);
                 ins = castMinValue == 0 ? INS_bgeu : INS_bge;
                 genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, reg, nullptr, REG_R21);
             }
             else
-            { // should amend.
+            {
                 GetEmitter()->emitIns_R_R_I(INS_addi_w, EA_ATTR(desc.CheckSrcSize()), REG_R21, REG_R0, castMaxValue);
                 ins = castMinValue == 0 ? INS_bltu : INS_blt;
                 genJumpToThrowHlpBlk_la(SCK_OVERFLOW, ins, REG_R21, nullptr, reg);
@@ -7978,7 +7909,7 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d
 
             if (castMinValue != 0)
             {
-                if ((-2048 <= castMinValue) && (castMinValue < 2048))
+                if (isValidSimm12(castMinValue))
                 {
                     GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, castMinValue);
                 }
@@ -8028,20 +7959,6 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
         genIntCastOverflowCheck(cast, desc, srcReg);
     }
 
-    // if ((EA_ATTR(genTypeSize(srcType)) == EA_8BYTE) && (EA_ATTR(genTypeSize(dstType)) == EA_4BYTE))
-    //{
-    //    if (dstType == TYP_INT)
-    //    {
-    //        // convert t0 int32
-    //        emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);
-    //    }
-    //    else
-    //    {
-    //        // convert t0 uint32
-    //        emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos+31, pos);
-    //    }
-    //}
-    // else if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg))
     if ((desc.ExtendKind() != GenIntCastDesc::COPY) || (srcReg != dstReg))
     {
         instruction ins;
@@ -8062,24 +7979,20 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
                 ins = (desc.ExtendSrcSize() == 1) ? INS_ext_w_b : INS_ext_w_h;
                 emit->emitIns_R_R(ins, EA_PTRSIZE, dstReg, srcReg);
                 break;
-#ifdef TARGET_64BIT
+
             case GenIntCastDesc::ZERO_EXTEND_INT:
                 emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, dstReg, srcReg, pos + 31, pos);
                 break;
             case GenIntCastDesc::SIGN_EXTEND_INT:
                 emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);
                 break;
-#endif
+
             default:
                 assert(desc.ExtendKind() == GenIntCastDesc::COPY);
-#if 1
                 if (srcType == TYP_INT)
-                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0); // should amend.
+                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);
                 else
                     emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0);
-#else
-                emit->emitIns_R_R(INS_mov, EA_PTRSIZE, dstReg, srcReg);
-#endif
                 break;
         }
     }
@@ -8195,7 +8108,7 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
     {
         unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar;
         assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM);
-        const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber); // TODO: unused.
+        const LclVarDsc* reversePInvokeFrameVar = compiler->lvaGetDesc(reversePInvokeFrameVarNumber);
         gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar->GetStackOffset());
     }
 
@@ -8207,62 +8120,6 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
     compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
 }
 
-/* TODO for LOONGARCH64: not used for loongarch */
-// clang-format off
-const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32]
-{
-    //{ },       // NONE
-    //{ },       // 1
-    //{ EJ_lt }, // SLT
-    //{ EJ_le }, // SLE
-    //{ EJ_ge }, // SGE
-    //{ EJ_gt }, // SGT
-    //{ EJ_mi }, // S
-    //{ EJ_pl }, // NS
-
-    //{ EJ_eq }, // EQ
-    //{ EJ_ne }, // NE
-    //{ EJ_lo }, // ULT
-    //{ EJ_ls }, // ULE
-    //{ EJ_hs }, // UGE
-    //{ EJ_hi }, // UGT
-    //{ EJ_hs }, // C
-    //{ EJ_lo }, // NC
-
-    //{ EJ_eq },                // FEQ
-    //{ EJ_gt, GT_AND, EJ_lo }, // FNE
-    //{ EJ_lo },                // FLT
-    //{ EJ_ls },                // FLE
-    //{ EJ_ge },                // FGE
-    //{ EJ_gt },                // FGT
-    //{ EJ_vs },                // O
-    //{ EJ_vc },                // NO
-
-    //{ EJ_eq, GT_OR, EJ_vs },  // FEQU
-    //{ EJ_ne },                // FNEU
-    //{ EJ_lt },                // FLTU
-    //{ EJ_le },                // FLEU
-    //{ EJ_hs },                // FGEU
-    //{ EJ_hi },                // FGTU
-    //{ },                      // P
-    //{ },                      // NP
-};
-// clang-format on
-
-//------------------------------------------------------------------------
-// inst_SETCC: Generate code to set a register to 0 or 1 based on a condition.
-//
-// Arguments:
-//   condition - The condition
-//   type      - The type of the value to be produced
-//   dstReg    - The destination register to be set to 1 or 0
-//
-void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstReg)
-{
-    /* TODO for LOONGARCH64: should redesign and delete. */
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
 //------------------------------------------------------------------------
 // genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
 //
@@ -8320,26 +8177,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
         GetEmitter()->emitEnableGC();
     }
 }
-#if 1
-//------------------------------------------------------------------------
-// genScaledAdd: A helper for genLeaInstruction.
-// TODO: can amend further.
-void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale)
-{
-    emitter* emit = GetEmitter();
-    if (scale == 0)
-    {
-        // target = base + index
-        emit->emitIns_R_R_R(INS_add_d, attr, targetReg, baseReg, indexReg);
-    }
-    else
-    {
-        // target = base + index<<scale
-        emit->emitIns_R_R_I(INS_slli_d, attr, REG_R21, indexReg, scale);
-        emit->emitIns_R_R_R(INS_add_d, attr, targetReg, baseReg, REG_R21);
-    }
-}
-#endif
+
 //------------------------------------------------------------------------
 // genLeaInstruction: Produce code for a GT_LEA node.
 //
@@ -8353,14 +8191,9 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
     emitAttr size   = emitTypeSize(lea);
     int      offset = lea->Offset();
 
-    // In LOONGARCH we can only load addresses of the form:
-    //
-    // [Base + index*scale]
-    // [Base + Offset]
-    // [Literal] (PC-Relative)
-    //
     // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate:
-    // destReg = baseReg + indexReg * scale;
+    // tmpReg = indexReg << scale;
+    // destReg = baseReg + tmpReg;
     // destReg = destReg + offset;
     //
     // TODO-LOONGARCH64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture
@@ -8372,59 +8205,50 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
         GenTree* memBase = lea->Base();
         GenTree* index   = lea->Index();
 
-        DWORD scale;
-
         assert(isPow2(lea->gtScale));
-        BitScanForward(&scale, lea->gtScale);
-
-        assert(scale <= 4);
 
-        if (offset != 0)
+        regNumber tmpReg;
+        if (lea->gtScale == 0)
         {
-            regNumber tmpReg = lea->GetSingleTempReg();
+            tmpReg = index->GetRegNum();
+        }
+        else
+        {
+            DWORD scale;
+            BitScanForward(&scale, lea->gtScale);
+            assert(scale <= 4);
 
-            // When generating fully interruptible code we have to use the "large offset" sequence
-            // when calculating a EA_BYREF as we can't report a byref that points outside of the object
-            //
-            bool useLargeOffsetSeq = compiler->GetInterruptible() && (size == EA_BYREF);
+            emit->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_R21, index->GetRegNum(), scale);
+            tmpReg = REG_R21;
+        }
 
-            if (!useLargeOffsetSeq && ((-2048 <= offset) && (offset <= 2047)))
+        if (offset != 0)
+        {
+            if (isValidSimm12(offset))
             {
-                // Generate code to set tmpReg = base + index*scale
-                genScaledAdd(size, tmpReg, memBase->GetRegNum(), index->GetRegNum(), scale);
-
-                // Then compute target reg from [tmpReg + offset]
-                emit->emitIns_R_R_I(INS_addi_d, size, lea->GetRegNum(), tmpReg, offset);
+                emit->emitIns_R_R_I(INS_addi_d, size, tmpReg, tmpReg, offset);
             }
-            else // large offset sequence
+            else
             {
-                noway_assert(tmpReg != index->GetRegNum());
-                noway_assert(tmpReg != memBase->GetRegNum());
-
-                // First load/store tmpReg with the offset constant
-                //      rTmp = imm
-                GetEmitter()->emitIns_I_la(EA_PTRSIZE, tmpReg, offset);
+                regNumber tmpReg2 = lea->GetSingleTempReg();
 
-                // Then add the scaled index register
-                //      rTmp = rTmp + index*scale
-                genScaledAdd(EA_PTRSIZE, tmpReg, tmpReg, index->GetRegNum(), scale);
+                noway_assert(tmpReg2 != index->GetRegNum());
+                noway_assert(tmpReg2 != memBase->GetRegNum());
+                noway_assert(tmpReg2 != tmpReg);
 
-                // Then compute target reg from [base + tmpReg ]
-                //      rDst = base + rTmp
-                emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg);
+                // compute the large offset.
+                emit->emitIns_I_la(EA_PTRSIZE, tmpReg2, offset);
+                emit->emitIns_R_R_R(INS_add_d, size, tmpReg, tmpReg, tmpReg2);
             }
         }
-        else
-        {
-            // Then compute target reg from [base + index*scale]
-            genScaledAdd(size, lea->GetRegNum(), memBase->GetRegNum(), index->GetRegNum(), scale);
-        }
+
+        emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg);
     }
     else if (lea->Base())
     {
         GenTree* memBase = lea->Base();
 
-        if ((-2048 <= offset) && (offset <= 2047))
+        if (isValidSimm12(offset))
         {
             if (offset != 0)
             {
@@ -8445,7 +8269,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
             regNumber tmpReg = lea->GetSingleTempReg();
 
             // First load tmpReg with the large offset constant
-            GetEmitter()->emitIns_I_la(EA_PTRSIZE, tmpReg, offset);
+            emit->emitIns_I_la(EA_PTRSIZE, tmpReg, offset);
 
             // Then compute target reg from [memBase + tmpReg]
             emit->emitIns_R_R_R(INS_add_d, size, lea->GetRegNum(), memBase->GetRegNum(), tmpReg);
@@ -8481,7 +8305,7 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
     }
     else
     {
-        assert((-2048 <= delta) && (delta < 2048));
+        assert(isValidSimm12(delta));
         GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
     }
 
@@ -8705,7 +8529,7 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(
             callType   = emitter::EC_INDIR_R;
             callTarget = REG_DEFAULT_HELPER_CALL_TARGET;
 
-            // ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.//TODO: maybe optimize.
+            // ssize_t imm = (4 + 1 + 1) << 2;// 4=li, 1=ld, 1=jirl.
 
             // instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
             // emit->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, callTarget, callTarget, 0);
@@ -8771,7 +8595,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
     }
 #endif // DEBUG
 
-    // TODO: Use the exact barrier type depending on the CPU.
+    // TODO-LOONGARCH64: Use the exact barrier type depending on the CPU.
     GetEmitter()->emitIns_I(INS_dbar, EA_4BYTE, INS_BARRIER_FULL);
 }
 
@@ -8883,7 +8707,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     // Notes:
     // 1. FP is always saved, and the first store is FP, RA.
     // 2. General-purpose registers are 8 bytes, floating-point registers are 8 bytes, but SIMD/FP registers 16 bytes.
-    //    TODO: supporting SIMD feature !
+    //    TODO-LOONGARCH64: supporting SIMD feature !
     // 3. For frames with varargs, not implemented completely and not tested !
     // 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
     //
@@ -8903,7 +8727,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     //      |  incoming arguments   |
     //      +=======================+ <---- Caller's SP
     //      |     Arguments  Or     | // if needed.
-    //      |  Varargs regs space   | // Only for varargs functions; 64 bytes (TODO: not implement completely)
+    //      |  Varargs regs space   | // Only for varargs functions; (varargs not implemented for LoongArch64)
     //      |-----------------------|
     //      |Callee saved registers | // not including FP/RA; multiple of 8 bytes
     //      |-----------------------|
@@ -8932,7 +8756,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     //      |  incoming arguments   |
     //      +=======================+ <---- Caller's SP
     //      |     Arguments  Or     | // if needed.
-    //      |  Varargs regs space   | // Only for varargs functions; 64 bytes (TODO: not implement completely)
+    //      |  Varargs regs space   | // Only for varargs functions; (varargs not implemented for LoongArch64)
     //      |-----------------------|
     //      |      Saved RA         | // 8 bytes
     //      |-----------------------|
@@ -9540,7 +9364,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
 
             base += baseOffset;
 
-            if ((-2048 <= base) && (base < 2048))
+            if (isValidSimm12(base))
             {
                 GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
             }
@@ -9586,7 +9410,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                 {
                     base += baseOffset;
 
-                    if ((-2048 <= base) && (base < 2048))
+                    if (isValidSimm12(base))
                     {
                         GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
                     }
@@ -9617,7 +9441,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                     base += 8;
 
                     GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize());
-                    if ((-2048 <= base) && (base < 2048))
+                    if (isValidSimm12(base))
                     {
                         GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset);
                     }
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 2a19eb10bacec..fae914498d6e8 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -758,11 +758,8 @@ class emitter
 #elif defined(TARGET_ARM64)
 // For Arm64, we have used 17 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (17)
-#elif defined(TARGET_XARCH)
-// For xarch, we have used 14 bits from the second DWORD.
-#define ID_EXTRA_BITFIELD_BITS (14)
-#elif defined(TARGET_LOONGARCH64)
-                                   // For Loongarch64, we have used 14 bits from the second DWORD.
+#elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64)
+// For xarch and LoongArch64, we have used 14 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (14)
 #else
 #error Unsupported or unset target architecture
@@ -1052,7 +1049,7 @@ class emitter
         }
 
 #elif defined(TARGET_LOONGARCH64)
-        unsigned idCodeSize() const
+        unsigned    idCodeSize() const
         {
             return _idCodeSize; //_idInsCount;
         }
diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h
index e04d60270d567..2f47160ac8d39 100644
--- a/src/coreclr/jit/emitfmtsloongarch64.h
+++ b/src/coreclr/jit/emitfmtsloongarch64.h
@@ -11,16 +11,13 @@
 #ifdef DEFINE_ID_OPS
 //////////////////////////////////////////////////////////////////////////////
 
-#undef DEFINE_ID_OPS
 enum ID_OPS
 {
     ID_OP_NONE, // no additional arguments
-    ID_OP_SCNS, // small const  operand (21-bits or less, no reloc)
-    ID_OP_JMP,  // local jump
-    ID_OP_CALL, // method call
-    ID_OP_SPEC, // special handling required
 };
 
+#undef DEFINE_ID_OPS
+
 //////////////////////////////////////////////////////////////////////////////
 #else // !DEFINE_ID_OPS
 //////////////////////////////////////////////////////////////////////////////
@@ -38,13 +35,6 @@ enum ID_OPS
 
 IF_DEF(NONE, IS_NONE, NONE) //
 
-
-//IF_DEF(LABEL, IS_NONE, JMP)    // label
-//IF_DEF(LARGEJMP, IS_NONE, JMP) // large conditional branch pseudo-op (cond branch + uncond branch)
-//IF_DEF(LARGEADR, IS_NONE, JMP) // large address pseudo-op (adrp + add)
-//IF_DEF(LARGELDC, IS_NONE, JMP) // large constant pseudo-op (adrp + ldr)
-
-
 IF_DEF(OPCODE, IS_NONE, NONE)
 IF_DEF(OPCODES_16, IS_NONE, NONE)
 IF_DEF(OP_FMT, IS_NONE, NONE)
diff --git a/src/coreclr/jit/emitjmps.h b/src/coreclr/jit/emitjmps.h
index 97e216dccbb60..cd10727f6eec3 100644
--- a/src/coreclr/jit/emitjmps.h
+++ b/src/coreclr/jit/emitjmps.h
@@ -48,6 +48,7 @@ JMP_SMALL(le    , gt    , ble    )  // LE
 
 #elif defined(TARGET_LOONGARCH64)
 
+// TODO-LOONGARCH64: adding other condition branches.
 JMP_SMALL(jmp   , jmp   , b      )
 JMP_SMALL(eq    , ne    , beq    )  // EQ
 JMP_SMALL(ne    , eq    , bne    )  // NE
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
index 85841251de82a..0d9b5da867eb4 100644
--- a/src/coreclr/jit/emitloongarch64.h
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -78,142 +78,12 @@ bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src
 bool IsRedundantLdStr(
     instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end.
 
-/************************************************************************
-*
-* This union is used to to encode/decode the special LOONGARCH64 immediate values
-* that is listed as imm(N,r,s) and referred to as 'bitmask immediate'
-*/
-
-union bitMaskImm {
-    struct
-    {
-        unsigned immS : 6; // bits 0..5
-        unsigned immR : 6; // bits 6..11
-        unsigned immN : 1; // bits 12
-    };
-    unsigned immNRS; // concat N:R:S forming a 13-bit unsigned immediate
-};
-
-/************************************************************************
-*
-*  Convert between a 64-bit immediate and its 'bitmask immediate'
-*   representation imm(i16,hw)
-*/
-
-// static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size);
-
-// static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size);
-
-/************************************************************************
-*
-* This union is used to to encode/decode the special LOONGARCH64 immediate values
-* that is listed as imm(i16,hw) and referred to as 'halfword immediate'
-*/
-
-union halfwordImm {
-    struct
-    {
-        unsigned immVal : 16; // bits  0..15
-        unsigned immHW : 2;   // bits 16..17
-    };
-    unsigned immHWVal; // concat HW:Val forming a 18-bit unsigned immediate
-};
-
-/************************************************************************
-*
-*  Convert between a 64-bit immediate and its 'halfword immediate'
-*   representation imm(i16,hw)
-*/
-
-// static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size);
-
-// static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size);
-
-/************************************************************************
-*
-* This union is used to encode/decode the special LOONGARCH64 immediate values
-* that is listed as imm(i16,by) and referred to as 'byteShifted immediate'
-*/
-
-union byteShiftedImm {
-    struct
-    {
-        unsigned immVal : 8;  // bits  0..7
-        unsigned immBY : 2;   // bits  8..9
-        unsigned immOnes : 1; // bit   10
-    };
-    unsigned immBSVal; // concat Ones:BY:Val forming a 10-bit unsigned immediate
-};
-
-/************************************************************************
-*
-*  Convert between a 16/32-bit immediate and its 'byteShifted immediate'
-*   representation imm(i8,by)
-*/
-
-// static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);
-
-// static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
-
-/************************************************************************
-*
-* This union is used to to encode/decode the special LOONGARCH64 immediate values
-* that are use for FMOV immediate and referred to as 'float 8-bit immediate'
-*/
-
-union floatImm8 {
-    struct
-    {
-        unsigned immMant : 4; // bits 0..3
-        unsigned immExp : 3;  // bits 4..6
-        unsigned immSign : 1; // bits 7
-    };
-    unsigned immFPIVal; // concat Sign:Exp:Mant forming an 8-bit unsigned immediate
-};
-
-/************************************************************************
-*
-*  Convert between a double and its 'float 8-bit immediate' representation
-*/
-
-// static emitter::floatImm8 emitEncodeFloatImm8(double immDbl);
-
-// static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm);
-
-/************************************************************************
-*
-*  This union is used to to encode/decode the cond, nzcv and imm5 values for
-*   instructions that use them in the small constant immediate field
-*/
-
-union condFlagsImm {
-    struct
-    {
-        // insCond   cond : 4;  // bits  0..3
-        // insCflags flags : 4; // bits  4..7
-        unsigned imm5 : 5; // bits  8..12
-    };
-    unsigned immCFVal; // concat imm5:flags:cond forming an 13-bit unsigned immediate
-};
-
-// Returns true if 'reg' represents an integer register.
-static bool isIntegerRegister(regNumber reg)
-{
-    return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST);
-}
-
 // Returns true if 'value' is a legal signed immediate 12 bit encoding.
 static bool isValidSimm12(ssize_t value)
 {
     return -(((int)1) << 11) <= value && value < (((int)1) << 11);
 };
 
-// Returns true if 'value' is a legal signed immediate 16 bit encoding.
-static bool isValidSimm16(ssize_t value)
-{
-    return -(((int)1) << 15) <= value && value < (((int)1) << 15);
-};
-
 // Returns true if 'value' is a legal signed immediate 20 bit encoding.
 static bool isValidSimm20(ssize_t value)
 {
@@ -311,8 +181,6 @@ void emitIns_R_R_I_I(
 
 void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
 
-// void emitIns_BARR(instruction ins, insBarrier barrier);
-
 void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
 
 void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
@@ -320,9 +188,6 @@ void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
 void emitIns_S_S_R_R(
     instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
 
-// void emitIns_R_R_S(
-//    instruction ins, emitAttr attr, regNumber ireg, regNumber ireg2, int sa);
-
 void emitIns_R_R_S_S(
     instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
 
@@ -402,12 +267,6 @@ void emitIns_Call(EmitCallType          callType,
                   bool             isJump = false);
 
 unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code);
-// BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i);
-// BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id);
-// BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
-// BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg);
-// BYTE* emitOutputShortConstant(
-//    BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize);
 
 unsigned get_curTotalCodeSize(); // bytes of code
 

From 5f896d5b7653e789ed45180789daeb42de7a4a5f Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Mon, 21 Feb 2022 14:33:42 +0800
Subject: [PATCH 26/46] [LoongArch64] add compiling the
 `clrjit_unix_loongarch64_*`.

---
 src/coreclr/gcinfo/CMakeLists.txt | 5 +++--
 src/coreclr/jit/CMakeLists.txt    | 8 +++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/coreclr/gcinfo/CMakeLists.txt b/src/coreclr/gcinfo/CMakeLists.txt
index 8c966bb3403b5..5f10c54e5d9f9 100644
--- a/src/coreclr/gcinfo/CMakeLists.txt
+++ b/src/coreclr/gcinfo/CMakeLists.txt
@@ -69,11 +69,12 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM)
   create_gcinfo_lib(TARGET gcinfo_${TARGET_OS_NAME}_${ARCH_TARGET_NAME} OS ${TARGET_OS_NAME} ARCH ${ARCH_TARGET_NAME})
 endif()
 
-if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
+if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+  create_gcinfo_lib(TARGET gcinfo_unix_loongarch64 OS unix ARCH loongarch64)
   create_gcinfo_lib(TARGET gcinfo_universal_arm64 OS universal ARCH arm64)
   create_gcinfo_lib(TARGET gcinfo_unix_x64 OS unix ARCH x64)
   create_gcinfo_lib(TARGET gcinfo_win_x64 OS win ARCH x64)
-endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
+endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
 
 create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm)
 create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86)
diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt
index caf0726d970b3..a91c645898d18 100644
--- a/src/coreclr/jit/CMakeLists.txt
+++ b/src/coreclr/jit/CMakeLists.txt
@@ -576,14 +576,12 @@ install_clr(TARGETS clrjit DESTINATIONS . sharedFramework COMPONENT jit)
 # Enable profile guided optimization
 add_pgo(clrjit)
 
-#if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
-if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
+if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
   create_standalone_jit(TARGET clrjit_universal_arm64_${ARCH_HOST_NAME} OS universal ARCH arm64 DESTINATIONS .)
   create_standalone_jit(TARGET clrjit_unix_x64_${ARCH_HOST_NAME} OS unix ARCH x64 DESTINATIONS .)
   create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .)
-  #create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .)
-endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
-#endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+  create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .)
+endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
 
 create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .)
 target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI)

From d4a47ffba185e84189eb22b08f1cf1895143080d Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Wed, 23 Feb 2022 13:06:00 +0800
Subject: [PATCH 27/46] [LoongArch64] delete unused code and amend the format.
 Also amend inst_Mov.

---
 src/coreclr/jit/codegen.h              |   3 +
 src/coreclr/jit/codegencommon.cpp      |   2 +-
 src/coreclr/jit/codegenloongarch64.cpp |   2 +-
 src/coreclr/jit/emit.cpp               |  17 +-
 src/coreclr/jit/emit.h                 |  31 +-
 src/coreclr/jit/emitloongarch64.cpp    | 491 +--------------
 src/coreclr/jit/emitloongarch64.h      |  49 --
 src/coreclr/jit/instr.cpp              |   9 +
 src/coreclr/jit/instrsloongarch64.h    | 829 ++++++++++++-------------
 src/coreclr/jit/lclvars.cpp            |  60 +-
 src/coreclr/jit/lower.cpp              |  23 +-
 src/coreclr/jit/lowerloongarch64.cpp   | 773 +----------------------
 src/coreclr/jit/lsraloongarch64.cpp    |  14 +-
 src/coreclr/pal/inc/rt/ntimage.h       |   1 +
 14 files changed, 525 insertions(+), 1779 deletions(-)

diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 61732b9f007f2..89d8ba379b124 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -1526,7 +1526,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
     void inst_FS_ST(instruction ins, emitAttr size, TempDsc* tmp, unsigned ofs);
 
+#ifndef TARGET_LOONGARCH64
+    // Now this is only used on xarch.
     void inst_TT(instruction ins, GenTree* tree, unsigned offs = 0, int shfv = 0, emitAttr size = EA_UNKNOWN);
+#endif
 
     void inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumber reg);
 
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 08db8279bb7d1..5900d163d4c41 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -6029,7 +6029,7 @@ void CodeGen::genFnProlog()
 
 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     genPushCalleeSavedRegisters(initReg, &initRegZeroed);
-#else  // !TARGET_ARM64 || !TARGET_LOONGARCH64
+#else // !TARGET_ARM64 || !TARGET_LOONGARCH64
     if (!isOSRx64Root)
     {
         genPushCalleeSavedRegisters();
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 2d856550a5486..cfc8b6587cd3e 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -2191,7 +2191,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
             if (dataReg != targetReg)
             {
                 // Assign into targetReg when dataReg (from op1) is not the same register
-                inst_Mov(targetType, targetReg, dataReg, true);
+                inst_Mov(targetType, targetReg, dataReg, true, emitActualTypeSize(targetType));
             }
             genProduceReg(lclNode);
         }
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index 574b66263beb6..c371a33f03cda 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -795,21 +795,13 @@ insGroup* emitter::emitSavIG(bool emitAdd)
     }
 #endif
 
-// Record how many instructions and bytes of code this group contains
+    // Record how many instructions and bytes of code this group contains
 
-#ifdef TARGET_LOONGARCH64
-    noway_assert((unsigned int)emitCurIGinsCnt == emitCurIGinsCnt);
-#else
     noway_assert((BYTE)emitCurIGinsCnt == emitCurIGinsCnt);
-#endif
     noway_assert((unsigned short)emitCurIGsize == emitCurIGsize);
 
-#ifdef TARGET_LOONGARCH64
-    ig->igInsCnt = (unsigned int)emitCurIGinsCnt;
-#else
     ig->igInsCnt = (BYTE)emitCurIGinsCnt;
-#endif
-    ig->igSize = (unsigned short)emitCurIGsize;
+    ig->igSize   = (unsigned short)emitCurIGsize;
     emitCurCodeOffset += emitCurIGsize;
     assert(IsCodeAligned(emitCurCodeOffset));
 
@@ -1178,11 +1170,6 @@ void emitter::emitBegFN(bool hasFramePtr
 
     ig->igNext = nullptr;
 
-//#ifdef TARGET_LOONGARCH64
-// On future maybe use this.
-//    ig->igJmpCnt = 0;
-//#endif
-
 #ifdef DEBUG
     emitScratchSigInfo = nullptr;
 #endif // DEBUG
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index fae914498d6e8..5008f79e680f9 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -303,12 +303,8 @@ struct insGroup
 #if EMIT_TRACK_STACK_DEPTH
     unsigned igStkLvl; // stack level on entry
 #endif
-    regMaskSmall igGCregs; // set of registers with live GC refs
-#ifdef TARGET_LOONGARCH64
-    unsigned int igInsCnt; // # of instructions  in this group
-#else
+    regMaskSmall  igGCregs; // set of registers with live GC refs
     unsigned char igInsCnt; // # of instructions  in this group
-#endif
 
 #else // REGMASK_BITS
 
@@ -598,20 +594,19 @@ class emitter
         static_assert_no_msg(INS_count <= 512);
         instruction _idIns : 9;
 #elif defined(TARGET_LOONGARCH64)
-        /* TODO: not include SIMD-vector. */
+        // TODO-LoongArch64: not include SIMD-vector.
         static_assert_no_msg(INS_count <= 512);
         instruction _idIns : 9;
-#else  // !(defined(TARGET_XARCH) || defined(TARGET_ARM64))
+#else  // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
         static_assert_no_msg(INS_count <= 256);
         instruction _idIns : 8;
-#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64))
+#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
 
 // The format for the instruction
 #if defined(TARGET_XARCH)
         static_assert_no_msg(IF_COUNT <= 128);
         insFormat _idInsFmt : 7;
 #elif defined(TARGET_LOONGARCH64)
-        // insFormat _idInsFmt : 5;// NOTE: LOONGARCH64 does not used the _idInsFmt .
         unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the
                                   // _idInsCount.
                                   // unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described.
@@ -683,13 +678,11 @@ class emitter
         opSize   _idOpSize : 3;   // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16, 5=32
                                   // At this point we have fully consumed first DWORD so that next field
                                   // doesn't cross a byte boundary.
-#elif defined(TARGET_ARM64)
-// Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields
-#elif defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 /* _idOpSize defined bellow. */
-#else  // ARM
-        opSize      _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
-#endif // ARM
+#else
+        opSize    _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
+#endif // ARM || TARGET_LOONGARCH64
 
         // On Amd64, this is where the second DWORD begins
         // On System V a call could return a struct in 2 registers. The instrDescCGCA struct below has  member that
@@ -738,7 +731,7 @@ class emitter
 #endif
 
 #ifdef TARGET_LOONGARCH64
-        /* TODO: for LOONGARCH: maybe delete on future. */
+        // TODO-LoongArch64: maybe delete on future.
         opSize  _idOpSize : 3;  // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16
         insOpts _idInsOpt : 6;  // loongarch options for special: placeholders. e.g emitIns_R_C, also identifying the
                                 // accessing a local on stack.
@@ -759,7 +752,7 @@ class emitter
 // For Arm64, we have used 17 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (17)
 #elif defined(TARGET_XARCH) || defined(TARGET_LOONGARCH64)
-// For xarch and LoongArch64, we have used 14 bits from the second DWORD.
+                                 // For xarch and LoongArch64, we have used 14 bits from the second DWORD.
 #define ID_EXTRA_BITFIELD_BITS (14)
 #else
 #error Unsupported or unset target architecture
@@ -916,7 +909,7 @@ class emitter
                 regNumber _idReg3 : REGNUM_BITS;
                 regNumber _idReg4 : REGNUM_BITS;
             };
-#elif defined(TARGET_LOONGARCH64) // TARGET_XARCH
+#elif defined(TARGET_LOONGARCH64)
             struct
             {
                 unsigned int iiaEncodedInstr; // instruction's binary encoding.
@@ -947,7 +940,7 @@ class emitter
             {
                 return iiaJmpOffset;
             }
-#endif                            // defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_LOONGARCH64)
 
         } _idAddrUnion;
 
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index 2d260ffed00ac..dbf81a1e207fe 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -455,20 +455,8 @@ const emitJumpKind emitReverseJumpKinds[] = {
 
 /*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI_LOONGARCH64("emitInsToJumpKind-----unimplemented on LOONGARCH64 yet----");
     return EJ_NONE;
-#if 0
-    for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++)
-    {
-        if (ins == emitJumpKindInstructions[i])
-        {
-            emitJumpKind ret = (emitJumpKind)i;
-            assert(EJ_NONE < ret && ret < EJ_COUNT);
-            return ret;
-        }
-    }
-    unreached();
-#endif
 }
 
 /*****************************************************************************
@@ -1006,50 +994,6 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of
     appendToCurIG(id);
 }
 
-/*****************************************************************************
- *
- *  Add an instruction referencing a single register.
- */
-
-void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    code_t code = emitInsCode(ins);
-
-#ifdef DEBUG
-#endif
-    switch (ins)
-    {
-        case INS_jr:
-        case INS_jr_hb:
-        case INS_mthi:
-        case INS_mtlo:
-            code |= (reg & 0x1f)<<21;//rs
-            break;
-
-        case INS_mfhi://mfhi
-        case INS_mflo:
-            code |= (reg & 0x1f)<<11;//rd
-            assert(isGeneralRegister(reg));
-            break;
-
-        default:
-            unreached();
-    }
-
-    instrDesc* id = emitNewInstr(attr);
-
-    id->idIns(ins);
-    id->idReg1(reg);
-    id->idAddr()->iiaSetInstrEncode(code);
-
-    id->idCodeSize(4);
-    //dispIns(id);
-    appendToCurIG(id);
-#endif
-}
-
 /*****************************************************************************
  *
  *  Add an instruction referencing a register and a constant.
@@ -1058,7 +1002,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
 void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
 {
     code_t code = emitInsCode(ins);
-    //#ifdef DEBUG
+
     switch (ins)
     {
         case INS_lu12i_w:
@@ -1115,7 +1059,6 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
             unreached();
             break;
     } // end switch (ins)
-      //#endif
 
     instrDesc* id = emitNewInstr(attr);
 
@@ -1128,9 +1071,6 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
     appendToCurIG(id);
 }
 
-// NOTEADD:This function is new in emitarm64.cpp,so it be added to emitloongarch.cpp.
-//        But I don't konw how to change it so that it can be used on LA.
-//        I just add a statement "assert(!"unimplemented on LOONGARCH yet");".
 //------------------------------------------------------------------------
 // emitIns_Mov: Emits a move instruction
 //
@@ -1144,11 +1084,16 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
 //
 void emitter::emitIns_Mov(
     instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */)
-{ // TODO: should amend for LoongArch64/LOONGARCH64.
+{ // TODO-LoongArch64: should amend for LoongArch64/LOONGARCH64.
     assert(IsMovInstruction(ins));
 
     if (!canSkip || (dstReg != srcReg))
-        emitIns_R_R(ins, attr, dstReg, srcReg);
+    {
+        if ((EA_4BYTE == attr) && (INS_mov == ins))
+            emitIns_R_R_I(INS_slli_w, attr, dstReg, srcReg, 0);
+        else
+            emitIns_R_R(ins, attr, dstReg, srcReg);
+    }
 }
 
 /*****************************************************************************
@@ -1313,42 +1258,6 @@ void emitter::emitIns_R_R(
     appendToCurIG(id);
 }
 
-void emitter::emitIns_R_I_I(
-    instruction ins, emitAttr attr, regNumber reg, ssize_t hint, ssize_t off, insOpts opt /* = INS_OPTS_NONE */)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-#ifdef DEBUG
-    switch (ins)
-    {
-        case INS_pref:
-            assert(isGeneralRegister(reg));
-            assert((-32769 < off) && (off < 32768));
-            break;
-
-        default:
-            unreached();
-    }
-#endif
-    code_t code = emitInsCode(ins);
-
-    code |= (hint & 0x1f)<<16; //hint
-    code |= (reg & 0x1f)<<21; //rs or base
-    code |= (off & 0xffff);   //offset
-
-    ssize_t imms[] = {hint, off};
-    instrDesc* id = emitNewInstr(attr);
-
-    id->idIns(ins);
-    id->idReg1(reg);
-    id->idAddr()->iiaSetInstrEncode(code);
-
-    id->idCodeSize(4);
-    //dispIns(id);
-    appendToCurIG(id);
-#endif
-}
-
 /*****************************************************************************
  *
  *  Add an instruction referencing two registers and a constant.
@@ -1921,23 +1830,6 @@ void emitter::emitIns_R_R_R_I(instruction ins,
     appendToCurIG(id);
 }
 
-#if 1
-/*****************************************************************************
- *
- *  Add an instruction referencing three registers, with an extend option
- */
-
-void emitter::emitIns_R_R_R_Ext(instruction ins,
-                                emitAttr    attr,
-                                regNumber   reg1,
-                                regNumber   reg2,
-                                regNumber   reg3,
-                                insOpts     opt,         /* = INS_OPTS_NONE */
-                                int         shiftAmount) /* = -1 -- unset   */
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
 /*****************************************************************************
  *
  *  Add an instruction referencing two registers and two constants.
@@ -2031,97 +1923,6 @@ void emitter::emitIns_R_R_R_R(
     appendToCurIG(id);
 }
 
-/*****************************************************************************
- *
- *  Add an instruction with a static data member operand. If 'size' is 0, the
- *  instruction operates on the address of the static member instead of its
- *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
- */
-
-void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NYI("emitIns_C");
-#endif
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing stack-based local variable.
- */
-
-void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NYI("emitIns_S");
-#endif
-}
-
-#if 0
-/*****************************************************************************
- *
- *  Add an instruction referencing a register and a stack-based local variable.
- */
-
-void emitter::emitIns_R_R_S(
-    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int sa)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 1
-    regNumber regs[] = {reg1, reg2};
-    ssize_t imm = (ssize_t)sa;
-    emitAllocInstrOnly(emitInsOps(ins, regs, &imm), attr);
-#else
-    instrDesc* id = emitNewInstrCns(attr, sa);
-    insFormat fmt = IF_FMT_FUNC;
-
-    id->idIns(ins);
-    id->idInsFmt(fmt);
-    id->idInsOpt(INS_OPTS_NONE);
-
-    id->idReg1(reg1);
-    id->idReg2(reg2);
-
-    //dispIns(id);
-    appendToCurIG(id);
-#endif
-}
-#endif
-
-/*****************************************************************************
- *
- *  Add an instruction referencing two register and consectutive stack-based local variable slots.
- */
-void emitter::emitIns_R_R_S_S(
-    instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing consecutive stack-based local variable slots and two registers
- */
-void emitter::emitIns_S_S_R_R(
-    instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-
-/*****************************************************************************
- *
- *  Add an instruction referencing stack-based local variable and an immediate
- */
-void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NYI("emitIns_S_I");
-#endif
-}
-
 /*****************************************************************************
  *
  *  Add an instruction with a register + static member operands.
@@ -2144,7 +1945,7 @@ void emitter::emitIns_R_C(
     //   pcaddu12i reg, off-hi-20bits
     //   load  reg, offs_lo-12bits(reg)    #when ins is load ins.
     //
-    // INS_OPTS_RC: ins == bl placeholders.  3-ins:       ////TODO: maybe optimize.
+    // INS_OPTS_RC: ins == bl placeholders.  3-ins:       ////TODO-LoongArch64: maybe optimize.
     //   lu12i_w reg, addr-hi-20bits
     //   ori     reg, reg, addr-lo-12bits
     //   lu32i_d reg, addr_hi-32bits
@@ -2168,7 +1969,7 @@ void emitter::emitIns_R_C(
         id->idCodeSize(8);
     }
     else
-        id->idCodeSize(12); // TODO: maybe optimize.
+        id->idCodeSize(12); // TODO-LoongArch64: maybe optimize.
 
     if (EA_IS_GCREF(attr))
     {
@@ -2183,7 +1984,7 @@ void emitter::emitIns_R_C(
         id->idOpSize(EA_PTRSIZE);
     }
 
-    // TODO: this maybe deleted.
+    // TODO-LoongArch64: this maybe deleted.
     id->idSetIsBound(); // We won't patch address since we will know the exact distance
                         // once JIT code and data are allocated together.
 
@@ -2195,38 +1996,9 @@ void emitter::emitIns_R_C(
     appendToCurIG(id);
 }
 
-/*****************************************************************************
- *
- *  Add an instruction with a static member + constant.
- */
-
-void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NYI("emitIns_C_I");
-#endif
-}
-
-/*****************************************************************************
- *
- *  Add an instruction with a static member + register operands.
- */
-
-void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    assert(!"emitIns_C_R not supported for RyuJIT backend");
-#endif
-}
-
 void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NYI("emitIns_R_AR");
-#endif
+    NYI_LOONGARCH64("emitIns_R_AR-----unimplemented/unused on LOONGARCH64 yet----");
 }
 
 // This computes address from the immediate which is relocatable.
@@ -2275,54 +2047,6 @@ void emitter::emitIns_R_AI(instruction ins,
     appendToCurIG(id);
 }
 
-void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NYI("emitIns_AR_R");
-#endif
-}
-
-void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NYI("emitIns_R_ARR");
-#endif
-}
-
-void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NYI("emitIns_R_ARR");
-#endif
-}
-
-void emitter::emitIns_R_ARX(
-    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NYI("emitIns_R_ARR");
-#endif
-}
-
-/*****************************************************************************
- *
- *  Add a data label instruction.
- */
-void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg)
-{
-    NYI("emitIns_R_D");
-}
-
-void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int imm)
-{
-    assert(!"unimplemented on LOONGARCH yet");
-}
-#endif
-
 /*****************************************************************************
  *
  *  Record that a jump instruction uses the short encoding
@@ -2330,8 +2054,8 @@ void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, reg
  */
 void emitter::emitSetShortJump(instrDescJmp* id)
 {
-    /* TODO: maybe delete it on future. */
-    return;
+    // TODO-LoongArch64: maybe delete it on future.
+    NYI_LOONGARCH64("emitSetShortJump-----unimplemented/unused on LOONGARCH64 yet----");
 }
 
 /*****************************************************************************
@@ -2395,7 +2119,7 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu
 
 void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
 {
-    assert(!"unimplemented on LOONGARCH yet: emitIns_J_R."); // not used.
+    NYI_LOONGARCH64("emitIns_J_R-----unimplemented/unused on LOONGARCH64 yet----");
 }
 
 // NOTE:
@@ -2466,7 +2190,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
 
     id->idjShort = false;
 
-    ////TODO: maybe deleted this for loongarch64.
+    // TODO-LoongArch64: maybe deleted this.
     id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
 #ifdef DEBUG
     if (emitComp->opts.compLongAddress) // Force long branches
@@ -2496,7 +2220,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
 //
 void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1, regNumber reg2)
 {
-    // TODO:
+    // TODO-LoongArch64:
     //   Now the emitIns_J_cond_la() is only the short condition branch.
     //   There is no long condition branch for loongarch64 so far.
     //   For loongarch64, the long condition branch is like this:
@@ -2734,7 +2458,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
     id->idIns(ins);
 
     id->idInsOpt(INS_OPTS_C);
-    // TODO: maybe optimize.
+    // TODO-LoongArch64: maybe optimize.
 
     // INS_OPTS_C: placeholders.  1/2/4-ins:
     //   if (callType == EC_INDIR_R)
@@ -2895,7 +2619,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
     }
     else
     {
-        //      lu12i_w  t2, dst_offset_lo32-hi   //TODO: maybe optimize.
+        //      lu12i_w  t2, dst_offset_lo32-hi   // TODO-LoongArch64: maybe optimize.
         //      ori  t2, t2, dst_offset_lo32-lo
         //      lu32i_d  t2, dst_offset_hi32-lo
         //      jirl  t2
@@ -3338,7 +3062,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             //   pcaddu12i reg, offset-hi20
             //   addi_d  reg, reg, offset-lo12
             //
-            // else:       ////TODO:optimize.
+            // else:       // TODO-LoongArch64:optimize.
             //   lu12i_w reg, dst-hi-12bits
             //   ori reg, reg, dst-lo-12bits
             //   lu32i_d reg, dst-hi-32bits
@@ -3410,7 +3134,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             //   bnez/beqz  dst      |   b  dst           |    b  dst
             //_next:
             //
-            //  case_2:           <---------- TODO: from INS_OPTS_J:
+            //  case_2:           <---------- TODO-LoongArch64: from INS_OPTS_J:
             //   bnez/beqz  _next:
             //   pcaddi r21,off-hi
             //   jirl  r0,r21,off-lo
@@ -3439,7 +3163,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                         {
                             regNumber reg2 = id->idReg2();
                             assert((INS_bceqz <= ins) && (ins <= INS_bgeu));
-                            // assert((INS_bceqz <= ins) && (ins <= INS_bl));//TODO
+                            // assert((INS_bceqz <= ins) && (ins <= INS_bl)); // TODO-LoongArch64
                             if ((INS_beq == ins) || (INS_bne == ins))
                             {
                                 if ((-0x400000 <= imm) && (imm < 0x400000))
@@ -3766,12 +3490,6 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             goto Label_OPCODE_0;
             // break;
         }
-        // case 0x1:
-        //{
-        //    assert(!"unimplemented on loongarch yet!");
-        //    //goto Label_OPCODE_1;
-        //    break;
-        //}
         case 0x2:
         {
             goto Label_OPCODE_2;
@@ -6129,7 +5847,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
 
 regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
 {
-    assert(!"unimplemented on LOONGARCH yet");
+    NYI_LOONGARCH64("emitInsBinary-----unimplemented on LOONGARCH64 yet----");
     return REG_R0;
 }
 
@@ -6382,10 +6100,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
             {
                 if (attr == EA_4BYTE)
                     emitIns_R_R_I_I(INS_bstrins_d, EA_8BYTE, dst->GetRegNum(), REG_R0, 63, 32);
-                // else
-                //{
-                //    assert(!"unimplemented on LOONGARCH yet:  ulong * ulong !!!");
-                //}
             }
 
             if (needCheckOv)
@@ -6586,7 +6300,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
 {
     insExecutionCharacteristics result;
 
-    // TODO: support this function for LoongArch64.
+    // TODO-LoongArch64: support this function.
     result.insThroughput       = PERFSCORE_THROUGHPUT_ZERO;
     result.insLatency          = PERFSCORE_LATENCY_ZERO;
     result.insMemoryAccessKind = PERFSCORE_MEMORY_NONE;
@@ -6681,95 +6395,8 @@ bool emitter::IsMovInstruction(instruction ins)
 
 bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-    return false;
-#if 0
-    assert(ins == INS_mov);
-
-    if (canSkip && (dst == src))
-    {
-        // These elisions used to be explicit even when optimizations were disabled
-        return true;
-    }
-
-    if (!emitComp->opts.OptimizationEnabled())
-    {
-        // The remaining move elisions should only happen if optimizations are enabled
-        return false;
-    }
-
-    if (dst == src)
-    {
-        // A mov with a EA_4BYTE has the side-effect of clearing the upper bits
-        // So only eliminate mov instructions that are not clearing the upper bits
-        //
-        if (isGeneralRegisterOrSP(dst) && (size == EA_8BYTE))
-        {
-            JITDUMP("\n -- suppressing mov because src and dst is same 8-byte register.\n");
-            return true;
-        }
-        else if (isVectorRegister(dst) && (size == EA_16BYTE))
-        {
-            JITDUMP("\n -- suppressing mov because src and dst is same 16-byte register.\n");
-            return true;
-        }
-    }
-
-    bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0);
-
-    if (!isFirstInstrInBlock && // Don't optimize if instruction is not the first instruction in IG.
-        (emitLastIns != nullptr) &&
-        (emitLastIns->idIns() == INS_mov) && // Don't optimize if last instruction was not 'mov'.
-        (emitLastIns->idOpSize() == size))   // Don't optimize if operand size is different than previous instruction.
-    {
-        // Check if we did same move in prev instruction except dst/src were switched.
-        regNumber prevDst    = emitLastIns->idReg1();
-        regNumber prevSrc    = emitLastIns->idReg2();
-        insFormat lastInsfmt = emitLastIns->idInsFmt();
-
-        // Sometimes emitLastIns can be a mov with single register e.g. "mov reg, #imm". So ensure to
-        // optimize formats that does vector-to-vector or scalar-to-scalar register movs.
-        //
-        const bool isValidLastInsFormats =
-            ((lastInsfmt == IF_DV_3C) || (lastInsfmt == IF_DR_2G) || (lastInsfmt == IF_DR_2E));
-
-        if (isValidLastInsFormats && (prevDst == dst) && (prevSrc == src))
-        {
-            assert(emitLastIns->idOpSize() == size);
-            JITDUMP("\n -- suppressing mov because previous instruction already moved from src to dst register.\n");
-            return true;
-        }
-
-        if ((prevDst == src) && (prevSrc == dst) && isValidLastInsFormats)
-        {
-            // For mov with EA_8BYTE, ensure src/dst are both scalar or both vector.
-            if (size == EA_8BYTE)
-            {
-                if (isVectorRegister(src) == isVectorRegister(dst))
-                {
-                    JITDUMP("\n -- suppressing mov because previous instruction already did an opposite move from dst "
-                            "to src register.\n");
-                    return true;
-                }
-            }
-
-            // For mov with EA_16BYTE, both src/dst will be vector.
-            else if (size == EA_16BYTE)
-            {
-                assert(isVectorRegister(src) && isVectorRegister(dst));
-                assert(lastInsfmt == IF_DV_3C);
-
-                JITDUMP("\n -- suppressing mov because previous instruction already did an opposite move from dst to "
-                        "src register.\n");
-                return true;
-            }
-
-            // For mov of other sizes, don't optimize because it has side-effect of clearing the upper bits.
-        }
-    }
-
+    NYI_LOONGARCH64("IsRedundantMov-----unimplemented on LOONGARCH64 yet----");
     return false;
-#endif
 }
 
 //----------------------------------------------------------------------------------------
@@ -6798,71 +6425,7 @@ bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regN
 bool emitter::IsRedundantLdStr(
     instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-    return false;
-#if 0
-    bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0);
-
-    if (((ins != INS_ldr) && (ins != INS_str)) || (isFirstInstrInBlock) || (emitLastIns == nullptr))
-    {
-        return false;
-    }
-
-    regNumber prevReg1   = emitLastIns->idReg1();
-    regNumber prevReg2   = emitLastIns->idReg2();
-    insFormat lastInsfmt = emitLastIns->idInsFmt();
-    emitAttr  prevSize   = emitLastIns->idOpSize();
-    ssize_t prevImm = emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns();
-
-    // Only optimize if:
-    // 1. "base" or "base plus immediate offset" addressing modes.
-    // 2. Addressing mode matches with previous instruction.
-    // 3. The operand size matches with previous instruction
-    if (((fmt != IF_LS_2A) && (fmt != IF_LS_2B)) || (fmt != lastInsfmt) || (prevSize != size))
-    {
-        return false;
-    }
-
-    if ((ins == INS_ldr) && (emitLastIns->idIns() == INS_str))
-    {
-        // If reg1 is of size less than 8-bytes, then eliminating the 'ldr'
-        // will not zero the upper bits of reg1.
-
-        // Make sure operand size is 8-bytes
-        //  str w0, [x1, #4]
-        //  ldr w0, [x1, #4]  <-- can't eliminate because upper-bits of x0 won't get set.
-        if (size != EA_8BYTE)
-        {
-            return false;
-        }
-
-        if ((prevReg1 == reg1) && (prevReg2 == reg2) && (imm == prevImm))
-        {
-            JITDUMP("\n -- suppressing 'ldr reg%u [reg%u, #%u]' as previous 'str reg%u [reg%u, #%u]' was from same "
-                    "location.\n",
-                    reg1, reg2, imm, prevReg1, prevReg2, prevImm);
-            return true;
-        }
-    }
-    else if ((ins == INS_str) && (emitLastIns->idIns() == INS_ldr))
-    {
-        // Make sure src and dst registers are not same.
-        //  ldr x0, [x0, #4]
-        //  str x0, [x0, #4]  <-- can't eliminate because [x0+3] is not same destination as previous source.
-        // Note, however, that we can not eliminate store in the following sequence
-        //  ldr wzr, [x0, #4]
-        //  str wzr, [x0, #4]
-        // since load operation doesn't (and can't) change the value of its destination register.
-        if ((reg1 != reg2) && (prevReg1 == reg1) && (prevReg2 == reg2) && (imm == prevImm) && (reg1 != REG_ZR))
-        {
-            JITDUMP("\n -- suppressing 'str reg%u [reg%u, #%u]' as previous 'ldr reg%u [reg%u, #%u]' was from same "
-                    "location.\n",
-                    reg1, reg2, imm, prevReg1, prevReg2, prevImm);
-            return true;
-        }
-    }
-
+    NYI_LOONGARCH64("IsRedundantLdStr-----unimplemented on LOONGARCH64 yet----");
     return false;
-#endif
 }
 #endif // defined(TARGET_LOONGARCH64)
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
index 0d9b5da867eb4..2c9cbfd211bf1 100644
--- a/src/coreclr/jit/emitloongarch64.h
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -70,7 +70,6 @@ void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTr
 //  Emit the 32-bit LOONGARCH64 instruction 'code' into the 'dst'  buffer
 unsigned emitOutput_Instr(BYTE* dst, code_t code);
 
-// NOTEADD: New functions in emitarm64.h
 // Method to do check if mov is redundant with respect to the last instruction.
 // If yes, the caller of this method can choose to omit current mov instruction.
 static bool IsMovInstruction(instruction ins);
@@ -104,7 +103,6 @@ inline static unsigned getBitWidth(emitAttr size)
 
 inline static bool isGeneralRegister(regNumber reg)
 {
-    // Excludes REG_R0 ??
     return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST);
 }
 
@@ -131,14 +129,8 @@ void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int o
 void emitIns_I(instruction ins, emitAttr attr, ssize_t imm);
 void emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t offs);
 
-void emitIns_R_I_I(
-    instruction ins, emitAttr attr, regNumber reg1, ssize_t hint, ssize_t off, insOpts opt = INS_OPTS_NONE);
-
-void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
-
 void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE);
 
-// NOTEADD: NEW function in emitarm64.
 void emitIns_Mov(
     instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt = INS_OPTS_NONE);
 
@@ -167,32 +159,11 @@ void emitIns_R_R_R_I(instruction ins,
                      insOpts     opt      = INS_OPTS_NONE,
                      emitAttr    attrReg2 = EA_UNKNOWN);
 
-void emitIns_R_R_R_Ext(instruction ins,
-                       emitAttr    attr,
-                       regNumber   reg1,
-                       regNumber   reg2,
-                       regNumber   reg3,
-                       insOpts     opt         = INS_OPTS_NONE,
-                       int         shiftAmount = -1);
-
-// NODECHANGE: ADD an arg.
 void emitIns_R_R_I_I(
     instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE);
 
 void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
 
-void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
-
-void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
-
-void emitIns_S_S_R_R(
-    instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
-
-void emitIns_R_R_S_S(
-    instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
-
-void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
-
 void emitIns_R_C(
     instruction ins, emitAttr attr, regNumber reg, regNumber tmpReg, CORINFO_FIELD_HANDLE fldHnd, int offs);
 
@@ -200,33 +171,13 @@ void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
 
 void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
 
-void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs);
-
-void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, ssize_t offs, ssize_t val);
-
-void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg);
-
-void emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int instrCount);
-
-void emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber reg, int offs);
-
 void emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
 
-// NODECHANGE: ADD a description of arguments "disp"
 void emitIns_R_AI(instruction ins,
                   emitAttr    attr,
                   regNumber   reg,
                   ssize_t disp DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY));
 
-void emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs);
-
-void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
-
-void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
-
-void emitIns_R_ARX(
-    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
-
 enum EmitCallType
 {
 
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index e9f127786b631..bae791f106393 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -424,7 +424,11 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s
         size = emitActualTypeSize(type);
     }
 
+#ifdef TARGET_LOONGARCH64
+    NYI_LOONGARCH64("inst_RV-----unimplemented/unused on LOONGARCH64 yet----");
+#else
     GetEmitter()->emitIns_R(ins, size, reg);
+#endif
 }
 
 /*****************************************************************************
@@ -647,6 +651,8 @@ void CodeGen::inst_RV_IV(
  *  been made addressable).
  */
 
+#ifndef TARGET_LOONGARCH64
+// Now this is only used on xarch.
 void CodeGen::inst_TT(instruction ins, GenTree* tree, unsigned offs, int shfv, emitAttr size)
 {
     bool sizeInferred = false;
@@ -747,6 +753,7 @@ void CodeGen::inst_TT(instruction ins, GenTree* tree, unsigned offs, int shfv, e
             assert(!"invalid address");
     }
 }
+#endif
 
 //------------------------------------------------------------------------
 // inst_TT_RV: Generate a store of a lclVar
@@ -903,7 +910,9 @@ void CodeGen::inst_RV_TT(instruction ins,
             // For LoongArch64-ABI, the float arg might be passed by integer register,
             // when there is no float register left but there is integer register(s) left.
             if (emitter::isFloatReg(reg))
+            {
                 assert((ins == INS_fld_d) || (ins == INS_fld_s));
+            }
             else if (emitter::isGeneralRegister(reg) && (ins != INS_lea))
             {
                 ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d;
diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h
index 99cf4304a6200..e3da6728fd982 100644
--- a/src/coreclr/jit/instrsloongarch64.h
+++ b/src/coreclr/jit/instrsloongarch64.h
@@ -2,18 +2,14 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 /*****************************************************************************
- *  Loongarch64 instructions for JIT compiler
+ *  LoongArch64 instructions for JIT compiler
  *
- *          id      -- the enum name for the instruction
- *          nm      -- textual name (for assembly dipslay)
- *          fp      -- floating point instruction
+ *          id          -- the enum name for the instruction
+ *          nm          -- textual name (for assembly dipslay)
+ *          fp          -- floating point instruction
  *          ld/st/cmp   -- load/store/compare instruction
- *          fmt     -- encoding format used by this instruction
- *          e1      -- encoding 1
- *          e2      -- encoding 2
- *          e3      -- encoding 3
- *          e4      -- encoding 4
- *          e5      -- encoding 5
+ *          fmt         -- encoding format used by this instruction
+ *          encode      -- encoding 1
  *
 ******************************************************************************/
 
@@ -35,459 +31,458 @@
 //     emitInsMayWriteMultipleRegs in emitLoongarch64.cpp.
 
 // clang-format off
-INST(invalid, "INVALID", 0, 0, IF_NONE,  BAD_CODE)
+INST(invalid,       "INVALID",        0,    0,    IF_NONE,  BAD_CODE)
+INST(nop ,          "nop",            0,    0,    IF_LA,    0x03400000)
 
+                    // INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number.
+INST(bceqz,         "bceqz",          0,    0,    IF_LA,    0x48000000)
+INST(bcnez,         "bcnez",          0,    0,    IF_LA,    0x48000100)
 
-INST(nop ,	"nop",	0,	0,	IF_LA,	0x03400000)
+INST(beq,           "beq",            0,    0,    IF_LA,    0x58000000)
+INST(bne,           "bne",            0,    0,    IF_LA,    0x5c000000)
 
-////INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number.
-INST(bceqz,	"bceqz",	0,	0,	IF_LA,	0x48000000)
-INST(bcnez,	"bcnez",	0,	0,	IF_LA,	0x48000100)
-
-INST(beq,	"beq",	0,	0,	IF_LA,	0x58000000)
-INST(bne,	"bne",	0,	0,	IF_LA,	0x5c000000)
-
-INST(blt,	"blt",	0,	0,	IF_LA,	0x60000000)
-INST(bge,	"bge",	0,	0,	IF_LA,	0x64000000)
-INST(bltu,	"bltu",	0,	0,	IF_LA,	0x68000000)
-INST(bgeu,	"bgeu",	0,	0,	IF_LA,	0x6c000000)
+INST(blt,           "blt",            0,    0,    IF_LA,    0x60000000)
+INST(bge,           "bge",            0,    0,    IF_LA,    0x64000000)
+INST(bltu,          "bltu",           0,    0,    IF_LA,    0x68000000)
+INST(bgeu,          "bgeu",           0,    0,    IF_LA,    0x6c000000)
 
 ////R_I.
-INST(beqz,	"beqz",	0,	0,	IF_LA,	0x40000000)
-INST(bnez,	"bnez",	0,	0,	IF_LA,	0x44000000)
+INST(beqz,          "beqz",           0,    0,    IF_LA,    0x40000000)
+INST(bnez,          "bnez",           0,    0,    IF_LA,    0x44000000)
 
 ////I.
-INST(b,	"b",	0,	0,	IF_LA,	0x50000000)
-INST(bl,	"bl",	0,	0,	IF_LA,	0x54000000)
+INST(b,             "b",              0,    0,    IF_LA,    0x50000000)
+INST(bl,            "bl",             0,    0,    IF_LA,    0x54000000)
 
-////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////
 ////NOTE:  Begin
 ////     the fllowing instructions will be used by emitter::emitInsMayWriteToGCReg().
 ////////////////////////////////////////////////
 //    enum     name     FP LD/ST   FMT   ENCODE
-
+//
 ////NOTE: mov must be the first one !!! more info to see emitter::emitInsMayWriteToGCReg().
-INST(mov,     "mov",    0, 0, IF_LA, 0x03800000)
-      //  mov     rd,rj
-      //NOTE: On loongarch, usually it's name is move, but here for compatible using mov.
-      //      In fact, mov is an alias commond, "ori rd,rj,0"
-INST(dneg,            "dneg",         0, 0, IF_LA,  0x00118000)
-        //dneg is a alias instruction.
-        //sub_d rd, zero, rk
-INST(neg,             "neg",          0, 0, IF_LA,  0x00110000)
-        //neg is a alias instruction.
-        //sub_w rd, zero, rk
-INST(not,             "not",          0, 0, IF_LA,  0x00140000)
-        //not is a alias instruction.
-        //nor rd, rj, zero
-
-//    enum:id          name     FP LD/ST   Formate     ENCODE
+///////////////////////////////////////////////////////////////////////////////////////////
+//  mov     rd,rj
+//  In fact, mov is an alias instruction, "ori rd,rj,0"
+INST(mov,           "mov",            0,    0,    IF_LA,    0x03800000)
+                    //dneg is a alias instruction.
+                    //sub_d rd, zero, rk
+INST(dneg,          "dneg",           0,    0,    IF_LA,    0x00118000)
+                    //neg is a alias instruction.
+                    //sub_w rd, zero, rk
+INST(neg,           "neg",            0,    0,    IF_LA,    0x00110000)
+                    //not is a alias instruction.
+                    //nor rd, rj, zero
+INST(not,           "not",            0,    0,    IF_LA,    0x00140000)
+
+//   enum:id        name             FP   LD/ST   Formate   ENCODE
 ////R_R_R.
-INST(add_w,	"add.w",	0,	0,	IF_LA,	0x00100000)
-INST(add_d,	"add.d",	0,	0,	IF_LA,	0x00108000)
-INST(sub_w,	"sub.w",	0,	0,	IF_LA,	0x00110000)
-INST(sub_d,	"sub.d",	0,	0,	IF_LA,	0x00118000)
-
-INST(and,	"and",	0,	0,	IF_LA,	0x00148000)
-INST(or,	"or",	0,	0,	IF_LA,	0x00150000)
-INST(nor,	"nor",	0,	0,	IF_LA,	0x00140000)
-INST(xor,	"xor",	0,	0,	IF_LA,	0x00158000)
-INST(andn,	"andn",	0,	0,	IF_LA,	0x00168000)
-INST(orn,	"orn",	0,	0,	IF_LA,	0x00160000)
-
-INST(mul_w,	"mul.w",	0,	0,	IF_LA,	0x001c0000)
-INST(mul_d,	"mul.d",	0,	0,	IF_LA,	0x001d8000)
-INST(mulh_w,	"mulh.w",	0,	0,	IF_LA,	0x001c8000)
-INST(mulh_wu,	"mulh.wu",	0,	0,	IF_LA,	0x001d0000)
-INST(mulh_d,	"mulh.d",	0,	0,	IF_LA,	0x001e0000)
-INST(mulh_du,	"mulh.du",	0,	0,	IF_LA,	0x001e8000)
-INST(mulw_d_w,	"mulw.d.w",	0,	0,	IF_LA,	0x001f0000)
-INST(mulw_d_wu,	"mulw.d.wu",	0,	0,	IF_LA,	0x001f8000)
-INST(div_w,	"div.w",	0,	0,	IF_LA,	0x00200000)
-INST(div_wu,	"div.wu",	0,	0,	IF_LA,	0x00210000)
-INST(div_d,	"div.d",	0,	0,	IF_LA,	0x00220000)
-INST(div_du,	"div.du",	0,	0,	IF_LA,	0x00230000)
-INST(mod_w,	"mod.w",	0,	0,	IF_LA,	0x00208000)
-INST(mod_wu,	"mod.wu",	0,	0,	IF_LA,	0x00218000)
-INST(mod_d,	"mod.d",	0,	0,	IF_LA,	0x00228000)
-INST(mod_du,	"mod.du",	0,	0,	IF_LA,	0x00238000)
-
-INST(sll_w,	"sll.w",	0,	0,	IF_LA,	0x00170000)
-INST(srl_w,	"srl.w",	0,	0,	IF_LA,	0x00178000)
-INST(sra_w,	"sra.w",	0,	0,	IF_LA,	0x00180000)
-INST(rotr_w,	"rotr_w",	0,	0,	IF_LA,	0x001b0000)
-INST(sll_d,	"sll.d",	0,	0,	IF_LA,	0x00188000)
-INST(srl_d,	"srl.d",	0,	0,	IF_LA,	0x00190000)
-INST(sra_d,	"sra.d",	0,	0,	IF_LA,	0x00198000)
-INST(rotr_d,	"rotr.d",	0,	0,	IF_LA,	0x001b8000)
-
-INST(maskeqz,	"maskeqz",	0,	0,	IF_LA,	0x00130000)
-INST(masknez,	"masknez",	0,	0,	IF_LA,	0x00138000)
-
-INST(slt,	"slt",	0,	0,	IF_LA,	0x00120000)
-INST(sltu,	"sltu",	0,	0,	IF_LA,	0x00128000)
-
-INST(amswap_w,	"amswap.w",	0,	0,	IF_LA,	0x38600000)
-INST(amswap_d,	"amswap.d",	0,	0,	IF_LA,	0x38608000)
-INST(amswap_db_w,	"amswap_db.w",	0,	0,	IF_LA,	0x38690000)
-INST(amswap_db_d,	"amswap_db.d",	0,	0,	IF_LA,	0x38698000)
-INST(amadd_w,	"amadd.w",	0,	0,	IF_LA,	0x38610000)
-INST(amadd_d,	"amadd.d",	0,	0,	IF_LA,	0x38618000)
-INST(amadd_db_w,	"amadd_db.w",	0,	0,	IF_LA,	0x386a0000)
-INST(amadd_db_d,	"amadd_db.d",	0,	0,	IF_LA,	0x386a8000)
-INST(amand_w,	"amand.w",	0,	0,	IF_LA,	0x38620000)
-INST(amand_d,	"amand.d",	0,	0,	IF_LA,	0x38628000)
-INST(amand_db_w,	"amand_db.w",	0,	0,	IF_LA,	0x386b0000)
-INST(amand_db_d,	"amand_db.d",	0,	0,	IF_LA,	0x386b8000)
-INST(amor_w,	"amor.w",	0,	0,	IF_LA,	0x38630000)
-INST(amor_d,	"amor.d",	0,	0,	IF_LA,	0x38638000)
-INST(amor_db_w,	"amor_db.w",	0,	0,	IF_LA,	0x386c0000)
-INST(amor_db_d,	"amor_db.d",	0,	0,	IF_LA,	0x386c8000)
-INST(amxor_w,	"amxor.w",	0,	0,	IF_LA,	0x38640000)
-INST(amxor_d,	"amxor.d",	0,	0,	IF_LA,	0x38648000)
-INST(amxor_db_w,	"amxor_db.w",	0,	0,	IF_LA,	0x386d0000)
-INST(amxor_db_d,	"amxor_db.d",	0,	0,	IF_LA,	0x386d8000)
-INST(ammax_w,	"ammax.w",	0,	0,	IF_LA,	0x38650000)
-INST(ammax_d,	"ammax.d",	0,	0,	IF_LA,	0x38658000)
-INST(ammax_db_w,	"ammax_db.w",	0,	0,	IF_LA,	0x386e0000)
-INST(ammax_db_d,	"ammax_db.d",	0,	0,	IF_LA,	0x386e8000)
-INST(ammin_w,	"ammin.w",	0,	0,	IF_LA,	0x38660000)
-INST(ammin_d,	"ammin.d",	0,	0,	IF_LA,	0x38668000)
-INST(ammin_db_w,	"ammin_db.w",	0,	0,	IF_LA,	0x386f0000)
-INST(ammin_db_d,	"ammin_db.d",	0,	0,	IF_LA,	0x386f8000)
-INST(ammax_wu,	"ammax.wu",	0,	0,	IF_LA,	0x38670000)
-INST(ammax_du,	"ammax.du",	0,	0,	IF_LA,	0x38678000)
-INST(ammax_db_wu,	"ammax_db.wu",	0,	0,	IF_LA,	0x38700000)
-INST(ammax_db_du,	"ammax_db.du",	0,	0,	IF_LA,	0x38708000)
-INST(ammin_wu,	"ammin.wu",	0,	0,	IF_LA,	0x38680000)
-INST(ammin_du,	"ammin.du",	0,	0,	IF_LA,	0x38688000)
-INST(ammin_db_wu,	"ammin_db.wu",	0,	0,	IF_LA,	0x38710000)
-INST(ammin_db_du,	"ammin_db.du",	0,	0,	IF_LA,	0x38718000)
-
-INST(crc_w_b_w,	"crc.w.b.w",	0,	0,	IF_LA,	0x00240000)
-INST(crc_w_h_w,	"crc.w.h.w",	0,	0,	IF_LA,	0x00248000)
-INST(crc_w_w_w,	"crc.w.w.w",	0,	0,	IF_LA,	0x00250000)
-INST(crc_w_d_w,	"crc.w.d.w",	0,	0,	IF_LA,	0x00258000)
-INST(crcc_w_b_w,	"crcc.w.b.w",	0,	0,	IF_LA,	0x00260000)
-INST(crcc_w_h_w,	"crcc.w.h.w",	0,	0,	IF_LA,	0x00268000)
-INST(crcc_w_w_w,	"crcc.w.w.w",	0,	0,	IF_LA,	0x00270000)
-INST(crcc_w_d_w,	"crcc.w.d.w",	0,	0,	IF_LA,	0x00278000)
+INST(add_w,         "add.w",          0,    0,    IF_LA,    0x00100000)
+INST(add_d,         "add.d",          0,    0,    IF_LA,    0x00108000)
+INST(sub_w,         "sub.w",          0,    0,    IF_LA,    0x00110000)
+INST(sub_d,         "sub.d",          0,    0,    IF_LA,    0x00118000)
+
+INST(and,           "and",            0,    0,    IF_LA,    0x00148000)
+INST(or,            "or",             0,    0,    IF_LA,    0x00150000)
+INST(nor,           "nor",            0,    0,    IF_LA,    0x00140000)
+INST(xor,           "xor",            0,    0,    IF_LA,    0x00158000)
+INST(andn,          "andn",           0,    0,    IF_LA,    0x00168000)
+INST(orn,           "orn",            0,    0,    IF_LA,    0x00160000)
+
+INST(mul_w,         "mul.w",          0,    0,    IF_LA,    0x001c0000)
+INST(mul_d,         "mul.d",          0,    0,    IF_LA,    0x001d8000)
+INST(mulh_w,        "mulh.w",         0,    0,    IF_LA,    0x001c8000)
+INST(mulh_wu,       "mulh.wu",        0,    0,    IF_LA,    0x001d0000)
+INST(mulh_d,        "mulh.d",         0,    0,    IF_LA,    0x001e0000)
+INST(mulh_du,       "mulh.du",        0,    0,    IF_LA,    0x001e8000)
+INST(mulw_d_w,      "mulw.d.w",       0,    0,    IF_LA,    0x001f0000)
+INST(mulw_d_wu,     "mulw.d.wu",      0,    0,    IF_LA,    0x001f8000)
+INST(div_w,         "div.w",          0,    0,    IF_LA,    0x00200000)
+INST(div_wu,        "div.wu",         0,    0,    IF_LA,    0x00210000)
+INST(div_d,         "div.d",          0,    0,    IF_LA,    0x00220000)
+INST(div_du,        "div.du",         0,    0,    IF_LA,    0x00230000)
+INST(mod_w,         "mod.w",          0,    0,    IF_LA,    0x00208000)
+INST(mod_wu,        "mod.wu",         0,    0,    IF_LA,    0x00218000)
+INST(mod_d,         "mod.d",          0,    0,    IF_LA,    0x00228000)
+INST(mod_du,        "mod.du",         0,    0,    IF_LA,    0x00238000)
+
+INST(sll_w,         "sll.w",          0,    0,    IF_LA,    0x00170000)
+INST(srl_w,         "srl.w",          0,    0,    IF_LA,    0x00178000)
+INST(sra_w,         "sra.w",          0,    0,    IF_LA,    0x00180000)
+INST(rotr_w,        "rotr_w",         0,    0,    IF_LA,    0x001b0000)
+INST(sll_d,         "sll.d",          0,    0,    IF_LA,    0x00188000)
+INST(srl_d,         "srl.d",          0,    0,    IF_LA,    0x00190000)
+INST(sra_d,         "sra.d",          0,    0,    IF_LA,    0x00198000)
+INST(rotr_d,        "rotr.d",         0,    0,    IF_LA,    0x001b8000)
+
+INST(maskeqz,       "maskeqz",        0,    0,    IF_LA,    0x00130000)
+INST(masknez,       "masknez",        0,    0,    IF_LA,    0x00138000)
+
+INST(slt,           "slt",            0,    0,    IF_LA,    0x00120000)
+INST(sltu,          "sltu",           0,    0,    IF_LA,    0x00128000)
+
+INST(amswap_w,      "amswap.w",       0,    0,    IF_LA,    0x38600000)
+INST(amswap_d,      "amswap.d",       0,    0,    IF_LA,    0x38608000)
+INST(amswap_db_w,   "amswap_db.w",    0,    0,    IF_LA,    0x38690000)
+INST(amswap_db_d,   "amswap_db.d",    0,    0,    IF_LA,    0x38698000)
+INST(amadd_w,       "amadd.w",        0,    0,    IF_LA,    0x38610000)
+INST(amadd_d,       "amadd.d",        0,    0,    IF_LA,    0x38618000)
+INST(amadd_db_w,    "amadd_db.w",     0,    0,    IF_LA,    0x386a0000)
+INST(amadd_db_d,    "amadd_db.d",     0,    0,    IF_LA,    0x386a8000)
+INST(amand_w,       "amand.w",        0,    0,    IF_LA,    0x38620000)
+INST(amand_d,       "amand.d",        0,    0,    IF_LA,    0x38628000)
+INST(amand_db_w,    "amand_db.w",     0,    0,    IF_LA,    0x386b0000)
+INST(amand_db_d,    "amand_db.d",     0,    0,    IF_LA,    0x386b8000)
+INST(amor_w,        "amor.w",         0,    0,    IF_LA,    0x38630000)
+INST(amor_d,        "amor.d",         0,    0,    IF_LA,    0x38638000)
+INST(amor_db_w,     "amor_db.w",      0,    0,    IF_LA,    0x386c0000)
+INST(amor_db_d,     "amor_db.d",      0,    0,    IF_LA,    0x386c8000)
+INST(amxor_w,       "amxor.w",        0,    0,    IF_LA,    0x38640000)
+INST(amxor_d,       "amxor.d",        0,    0,    IF_LA,    0x38648000)
+INST(amxor_db_w,    "amxor_db.w",     0,    0,    IF_LA,    0x386d0000)
+INST(amxor_db_d,    "amxor_db.d",     0,    0,    IF_LA,    0x386d8000)
+INST(ammax_w,       "ammax.w",        0,    0,    IF_LA,    0x38650000)
+INST(ammax_d,       "ammax.d",        0,    0,    IF_LA,    0x38658000)
+INST(ammax_db_w,    "ammax_db.w",     0,    0,    IF_LA,    0x386e0000)
+INST(ammax_db_d,    "ammax_db.d",     0,    0,    IF_LA,    0x386e8000)
+INST(ammin_w,       "ammin.w",        0,    0,    IF_LA,    0x38660000)
+INST(ammin_d,       "ammin.d",        0,    0,    IF_LA,    0x38668000)
+INST(ammin_db_w,    "ammin_db.w",     0,    0,    IF_LA,    0x386f0000)
+INST(ammin_db_d,    "ammin_db.d",     0,    0,    IF_LA,    0x386f8000)
+INST(ammax_wu,      "ammax.wu",       0,    0,    IF_LA,    0x38670000)
+INST(ammax_du,      "ammax.du",       0,    0,    IF_LA,    0x38678000)
+INST(ammax_db_wu,   "ammax_db.wu",    0,    0,    IF_LA,    0x38700000)
+INST(ammax_db_du,   "ammax_db.du",    0,    0,    IF_LA,    0x38708000)
+INST(ammin_wu,      "ammin.wu",       0,    0,    IF_LA,    0x38680000)
+INST(ammin_du,      "ammin.du",       0,    0,    IF_LA,    0x38688000)
+INST(ammin_db_wu,   "ammin_db.wu",    0,    0,    IF_LA,    0x38710000)
+INST(ammin_db_du,   "ammin_db.du",    0,    0,    IF_LA,    0x38718000)
+
+INST(crc_w_b_w,     "crc.w.b.w",      0,    0,    IF_LA,    0x00240000)
+INST(crc_w_h_w,     "crc.w.h.w",      0,    0,    IF_LA,    0x00248000)
+INST(crc_w_w_w,     "crc.w.w.w",      0,    0,    IF_LA,    0x00250000)
+INST(crc_w_d_w,     "crc.w.d.w",      0,    0,    IF_LA,    0x00258000)
+INST(crcc_w_b_w,    "crcc.w.b.w",     0,    0,    IF_LA,    0x00260000)
+INST(crcc_w_h_w,    "crcc.w.h.w",     0,    0,    IF_LA,    0x00268000)
+INST(crcc_w_w_w,    "crcc.w.w.w",     0,    0,    IF_LA,    0x00270000)
+INST(crcc_w_d_w,    "crcc.w.d.w",     0,    0,    IF_LA,    0x00278000)
 
 ////R_R_R_I.
-INST(alsl_w,	"alsl.w",	0,	0,	IF_LA,	0x00040000)
-INST(alsl_wu,	"alsl.wu",	0,	0,	IF_LA,	0x00060000)
-INST(alsl_d,	"alsl.d",	0,	0,	IF_LA,	0x002c0000)
+INST(alsl_w,        "alsl.w",         0,    0,    IF_LA,    0x00040000)
+INST(alsl_wu,       "alsl.wu",        0,    0,    IF_LA,    0x00060000)
+INST(alsl_d,        "alsl.d",         0,    0,    IF_LA,    0x002c0000)
 
-INST(bytepick_w,	"bytepick.w",	0,	0,	IF_LA,	0x00080000)
-INST(bytepick_d,	"bytepick.d",	0,	0,	IF_LA,	0x000c0000)
+INST(bytepick_w,    "bytepick.w",     0,    0,    IF_LA,    0x00080000)
+INST(bytepick_d,    "bytepick.d",     0,    0,    IF_LA,    0x000c0000)
 
-INST(fsel,	"fsel",	0,	0,	IF_LA,	0x0d000000)
+INST(fsel,          "fsel",           0,    0,    IF_LA,    0x0d000000)
 
 ////R_I.
-INST(lu12i_w,	"lu12i.w",	0,	0,	IF_LA,	0x14000000)
-INST(lu32i_d,	"lu32i.d",	0,	0,	IF_LA,	0x16000000)
+INST(lu12i_w,       "lu12i.w",        0,    0,    IF_LA,    0x14000000)
+INST(lu32i_d,       "lu32i.d",        0,    0,    IF_LA,    0x16000000)
 
-INST(pcaddi,	"pcaddi",	0,	0,	IF_LA,	0x18000000)
-INST(pcaddu12i,	"pcaddu12i",	0,	0,	IF_LA,	0x1c000000)
-INST(pcalau12i,	"pcalau12i",	0,	0,	IF_LA,	0x1a000000)
-INST(pcaddu18i,	"pcaddu18i",	0,	0,	IF_LA,	0x1e000000)
+INST(pcaddi,        "pcaddi",         0,    0,    IF_LA,    0x18000000)
+INST(pcaddu12i,     "pcaddu12i",      0,    0,    IF_LA,    0x1c000000)
+INST(pcalau12i,     "pcalau12i",      0,    0,    IF_LA,    0x1a000000)
+INST(pcaddu18i,     "pcaddu18i",      0,    0,    IF_LA,    0x1e000000)
 
 ////R_R.
-INST(ext_w_b,	"ext.w.b",	0,	0,	IF_LA,	0x00005c00)
-INST(ext_w_h,	"ext.w.h",	0,	0,	IF_LA,	0x00005800)
-INST(clo_w,	"clo.w",	0,	0,	IF_LA,	0x00001000)
-INST(clz_w,	"clz.w",	0,	0,	IF_LA,	0x00001400)
-INST(cto_w,	"cto.w",	0,	0,	IF_LA,	0x00001800)
-INST(ctz_w,	"ctz.w",	0,	0,	IF_LA,	0x00001c00)
-INST(clo_d,	"clo.d",	0,	0,	IF_LA,	0x00002000)
-INST(clz_d,	"clz.d",	0,	0,	IF_LA,	0x00002400)
-INST(cto_d,	"cto.d",	0,	0,	IF_LA,	0x00002800)
-INST(ctz_d,	"ctz.d",	0,	0,	IF_LA,	0x00002c00)
-INST(revb_2h,	"revb.2h",	0,	0,	IF_LA,	0x00003000)
-INST(revb_4h,	"revb.4h",	0,	0,	IF_LA,	0x00003400)
-INST(revb_2w,	"revb.2w",	0,	0,	IF_LA,	0x00003800)
-INST(revb_d,	"revb.d",	0,	0,	IF_LA,	0x00003c00)
-INST(revh_2w,	"revh.2w",	0,	0,	IF_LA,	0x00004000)
-INST(revh_d,	"revh.d",	0,	0,	IF_LA,	0x00004400)
-INST(bitrev_4b,	"bitrev.4b",	0,	0,	IF_LA,	0x00004800)
-INST(bitrev_8b,	"bitrev.8b",	0,	0,	IF_LA,	0x00004c00)
-INST(bitrev_w,	"bitrev.w",	0,	0,	IF_LA,	0x00005000)
-INST(bitrev_d,	"bitrev.d",	0,	0,	IF_LA,	0x00005400)
-INST(rdtimel_w,	"rdtimel.w",	0,	0,	IF_LA,	0x00006000)
-INST(rdtimeh_w,	"rdtimeh.w",	0,	0,	IF_LA,	0x00006400)
-INST(rdtime_d,	"rdtime.d",	0,	0,	IF_LA,	0x00006800)
-INST(cpucfg,	"cpucfg",	0,	0,	IF_LA,	0x00006c00)
+INST(ext_w_b,       "ext.w.b",        0,    0,    IF_LA,    0x00005c00)
+INST(ext_w_h,       "ext.w.h",        0,    0,    IF_LA,    0x00005800)
+INST(clo_w,         "clo.w",          0,    0,    IF_LA,    0x00001000)
+INST(clz_w,         "clz.w",          0,    0,    IF_LA,    0x00001400)
+INST(cto_w,         "cto.w",          0,    0,    IF_LA,    0x00001800)
+INST(ctz_w,         "ctz.w",          0,    0,    IF_LA,    0x00001c00)
+INST(clo_d,         "clo.d",          0,    0,    IF_LA,    0x00002000)
+INST(clz_d,         "clz.d",          0,    0,    IF_LA,    0x00002400)
+INST(cto_d,         "cto.d",          0,    0,    IF_LA,    0x00002800)
+INST(ctz_d,         "ctz.d",          0,    0,    IF_LA,    0x00002c00)
+INST(revb_2h,       "revb.2h",        0,    0,    IF_LA,    0x00003000)
+INST(revb_4h,       "revb.4h",        0,    0,    IF_LA,    0x00003400)
+INST(revb_2w,       "revb.2w",        0,    0,    IF_LA,    0x00003800)
+INST(revb_d,        "revb.d",         0,    0,    IF_LA,    0x00003c00)
+INST(revh_2w,       "revh.2w",        0,    0,    IF_LA,    0x00004000)
+INST(revh_d,        "revh.d",         0,    0,    IF_LA,    0x00004400)
+INST(bitrev_4b,     "bitrev.4b",      0,    0,    IF_LA,    0x00004800)
+INST(bitrev_8b,     "bitrev.8b",      0,    0,    IF_LA,    0x00004c00)
+INST(bitrev_w,      "bitrev.w",       0,    0,    IF_LA,    0x00005000)
+INST(bitrev_d,      "bitrev.d",       0,    0,    IF_LA,    0x00005400)
+INST(rdtimel_w,     "rdtimel.w",      0,    0,    IF_LA,    0x00006000)
+INST(rdtimeh_w,     "rdtimeh.w",      0,    0,    IF_LA,    0x00006400)
+INST(rdtime_d,      "rdtime.d",       0,    0,    IF_LA,    0x00006800)
+INST(cpucfg,        "cpucfg",         0,    0,    IF_LA,    0x00006c00)
 
 ////R_R_I_I.
-INST(bstrins_w,	"bstrins.w",	0,	0,	IF_LA,	0x00600000)
-INST(bstrins_d,	"bstrins.d",	0,	0,	IF_LA,	0x00800000)
-INST(bstrpick_w,	"bstrpick.w",	0,	0,	IF_LA,	0x00608000)
-INST(bstrpick_d,	"bstrpick.d",	0,	0,	IF_LA,	0x00c00000)
+INST(bstrins_w,     "bstrins.w",      0,    0,    IF_LA,    0x00600000)
+INST(bstrins_d,     "bstrins.d",      0,    0,    IF_LA,    0x00800000)
+INST(bstrpick_w,    "bstrpick.w",     0,    0,    IF_LA,    0x00608000)
+INST(bstrpick_d,    "bstrpick.d",     0,    0,    IF_LA,    0x00c00000)
 
 ////Load.
-INST(ld_b,	"ld.b",	0,	LD,	IF_LA,	0x28000000)
-INST(ld_h,	"ld.h",	0,	LD,	IF_LA,	0x28400000)
-INST(ld_w,	"ld.w",	0,	LD,	IF_LA,	0x28800000)
-INST(ld_d,	"ld.d",	0,	LD,	IF_LA,	0x28c00000)
-INST(ld_bu,	"ld.bu",	0,	LD,	IF_LA,	0x2a000000)
-INST(ld_hu,	"ld.hu",	0,	LD,	IF_LA,	0x2a400000)
-INST(ld_wu,	"ld.wu",	0,	LD,	IF_LA,	0x2a800000)
-
-INST(ldptr_w,	"ldptr.w",	0,	LD,	IF_LA,	0x24000000)
-INST(ldptr_d,	"ldptr.d",	0,	LD,	IF_LA,	0x26000000)
-INST(ll_w,	"ll.w",	0,	0,	IF_LA,	0x20000000)
-INST(ll_d,	"ll.d",	0,	0,	IF_LA,	0x22000000)
-
-INST(ldx_b,	"ldx.b",	0,	LD,	IF_LA,	0x38000000)
-INST(ldx_h,	"ldx.h",	0,	LD,	IF_LA,	0x38040000)
-INST(ldx_w,	"ldx.w",	0,	LD,	IF_LA,	0x38080000)
-INST(ldx_d,	"ldx.d",	0,	LD,	IF_LA,	0x380c0000)
-INST(ldx_bu,	"ldx.bu",	0,	LD,	IF_LA,	0x38200000)
-INST(ldx_hu,	"ldx.hu",	0,	LD,	IF_LA,	0x38240000)
-INST(ldx_wu,	"ldx.wu",	0,	LD,	IF_LA,	0x38280000)
-
-INST(ldgt_b,	"ldgt.b",	0,	0,	IF_LA,	0x38780000)
-INST(ldgt_h,	"ldgt.h",	0,	0,	IF_LA,	0x38788000)
-INST(ldgt_w,	"ldgt.w",	0,	0,	IF_LA,	0x38790000)
-INST(ldgt_d,	"ldgt.d",	0,	0,	IF_LA,	0x38798000)
-INST(ldle_b,	"ldle.b",	0,	0,	IF_LA,	0x387a0000)
-INST(ldle_h,	"ldle.h",	0,	0,	IF_LA,	0x387a8000)
-INST(ldle_w,	"ldle.w",	0,	0,	IF_LA,	0x387b0000)
-INST(ldle_d,	"ldle.d",	0,	0,	IF_LA,	0x387b8000)
+INST(ld_b,          "ld.b",           0,    LD,   IF_LA,    0x28000000)
+INST(ld_h,          "ld.h",           0,    LD,   IF_LA,    0x28400000)
+INST(ld_w,          "ld.w",           0,    LD,   IF_LA,    0x28800000)
+INST(ld_d,          "ld.d",           0,    LD,   IF_LA,    0x28c00000)
+INST(ld_bu,         "ld.bu",          0,    LD,   IF_LA,    0x2a000000)
+INST(ld_hu,         "ld.hu",          0,    LD,   IF_LA,    0x2a400000)
+INST(ld_wu,         "ld.wu",          0,    LD,   IF_LA,    0x2a800000)
+
+INST(ldptr_w,       "ldptr.w",        0,    LD,   IF_LA,    0x24000000)
+INST(ldptr_d,       "ldptr.d",        0,    LD,   IF_LA,    0x26000000)
+INST(ll_w,          "ll.w",           0,    0,    IF_LA,    0x20000000)
+INST(ll_d,          "ll.d",           0,    0,    IF_LA,    0x22000000)
+
+INST(ldx_b,         "ldx.b",          0,    LD,   IF_LA,    0x38000000)
+INST(ldx_h,         "ldx.h",          0,    LD,   IF_LA,    0x38040000)
+INST(ldx_w,         "ldx.w",          0,    LD,   IF_LA,    0x38080000)
+INST(ldx_d,         "ldx.d",          0,    LD,   IF_LA,    0x380c0000)
+INST(ldx_bu,        "ldx.bu",         0,    LD,   IF_LA,    0x38200000)
+INST(ldx_hu,        "ldx.hu",         0,    LD,   IF_LA,    0x38240000)
+INST(ldx_wu,        "ldx.wu",         0,    LD,   IF_LA,    0x38280000)
+
+INST(ldgt_b,        "ldgt.b",         0,    0,    IF_LA,    0x38780000)
+INST(ldgt_h,        "ldgt.h",         0,    0,    IF_LA,    0x38788000)
+INST(ldgt_w,        "ldgt.w",         0,    0,    IF_LA,    0x38790000)
+INST(ldgt_d,        "ldgt.d",         0,    0,    IF_LA,    0x38798000)
+INST(ldle_b,        "ldle.b",         0,    0,    IF_LA,    0x387a0000)
+INST(ldle_h,        "ldle.h",         0,    0,    IF_LA,    0x387a8000)
+INST(ldle_w,        "ldle.w",         0,    0,    IF_LA,    0x387b0000)
+INST(ldle_d,        "ldle.d",         0,    0,    IF_LA,    0x387b8000)
 
 ////R_R_I.
-INST(addi_w,	"addi.w",	0,	0,	IF_LA,	0x02800000)
-INST(addi_d,	"addi.d",	0,	0,	IF_LA,	0x02c00000)
-INST(lu52i_d,	"lu52i.d",	0,	0,	IF_LA,	0x03000000)
-INST(slti,	"slti",	0,	0,	IF_LA,	0x02000000)
-
-INST(sltui,	"sltui",	0,	0,	IF_LA,	0x02400000)
-INST(andi,	"andi",	0,	0,	IF_LA,	0x03400000)
-INST(ori,	"ori",	0,	0,	IF_LA,	0x03800000)
-INST(xori,	"xori",	0,	0,	IF_LA,	0x03c00000)
-
-INST(slli_w,	"slli.w",	0,	0,	IF_LA,	0x00408000)
-INST(srli_w,	"srli.w",	0,	0,	IF_LA,	0x00448000)
-INST(srai_w,	"srai.w",	0,	0,	IF_LA,	0x00488000)
-INST(rotri_w,	"rotri.w",	0,	0,	IF_LA,	0x004c8000)
-INST(slli_d,	"slli.d",	0,	0,	IF_LA,	0x00410000)
-INST(srli_d,	"srli.d",	0,	0,	IF_LA,	0x00450000)
-INST(srai_d,	"srai.d",	0,	0,	IF_LA,	0x00490000)
-INST(rotri_d,	"rotri.d",	0,	0,	IF_LA,	0x004d0000)
-
-INST(addu16i_d,	"addu16i.d",	0,	0,	IF_LA,	0x10000000)
-
-INST(jirl,	"jirl",	0,	0,	IF_LA,	0x4c000000)
-
+INST(addi_w,        "addi.w",         0,    0,    IF_LA,    0x02800000)
+INST(addi_d,        "addi.d",         0,    0,    IF_LA,    0x02c00000)
+INST(lu52i_d,       "lu52i.d",        0,    0,    IF_LA,    0x03000000)
+INST(slti,          "slti",           0,    0,    IF_LA,    0x02000000)
+
+INST(sltui,         "sltui",          0,    0,    IF_LA,    0x02400000)
+INST(andi,          "andi",           0,    0,    IF_LA,    0x03400000)
+INST(ori,           "ori",            0,    0,    IF_LA,    0x03800000)
+INST(xori,          "xori",           0,    0,    IF_LA,    0x03c00000)
+
+INST(slli_w,        "slli.w",         0,    0,    IF_LA,    0x00408000)
+INST(srli_w,        "srli.w",         0,    0,    IF_LA,    0x00448000)
+INST(srai_w,        "srai.w",         0,    0,    IF_LA,    0x00488000)
+INST(rotri_w,       "rotri.w",        0,    0,    IF_LA,    0x004c8000)
+INST(slli_d,        "slli.d",         0,    0,    IF_LA,    0x00410000)
+INST(srli_d,        "srli.d",         0,    0,    IF_LA,    0x00450000)
+INST(srai_d,        "srai.d",         0,    0,    IF_LA,    0x00490000)
+INST(rotri_d,       "rotri.d",        0,    0,    IF_LA,    0x004d0000)
+
+INST(addu16i_d,     "addu16i.d",      0,    0,    IF_LA,    0x10000000)
+
+INST(jirl,          "jirl",           0,    0,    IF_LA,    0x4c000000)
+////////////////////////////////////////////////////////////////////////////////////////////
 ////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg().
-////////////////////////////////////////////////
+//
 ////NOTE:  End
 ////     the above instructions will be used by emitter::emitInsMayWriteToGCReg().
-////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////
+
 ////Store.
-INST(st_b,	"st.b",	0,	ST,	IF_LA,	0x29000000)
-INST(st_h,	"st.h",	0,	ST,	IF_LA,	0x29400000)
-INST(st_w,	"st.w",	0,	ST,	IF_LA,	0x29800000)
-INST(st_d,	"st.d",	0,	ST,	IF_LA,	0x29c00000)
-
-INST(stptr_w,	"stptr.w",	0,	ST,	IF_LA,	0x25000000)
-INST(stptr_d,	"stptr.d",	0,	ST,	IF_LA,	0x27000000)
-INST(sc_w,	"sc.w",	0,	0,	IF_LA,	0x21000000)
-INST(sc_d,	"sc.d",	0,	0,	IF_LA,	0x23000000)
-
-INST(stx_b,	"stx.b",	0,	ST,	IF_LA,	0x38100000)
-INST(stx_h,	"stx.h",	0,	ST,	IF_LA,	0x38140000)
-INST(stx_w,	"stx.w",	0,	ST,	IF_LA,	0x38180000)
-INST(stx_d,	"stx.d",	0,	ST,	IF_LA,	0x381c0000)
-INST(stgt_b,	"stgt.b",	0,	0,	IF_LA,	0x387c0000)
-INST(stgt_h,	"stgt.h",	0,	0,	IF_LA,	0x387c8000)
-INST(stgt_w,	"stgt.w",	0,	0,	IF_LA,	0x387d0000)
-INST(stgt_d,	"stgt.d",	0,	0,	IF_LA,	0x387d8000)
-INST(stle_b,	"stle.b",	0,	0,	IF_LA,	0x387e0000)
-INST(stle_h,	"stle.h",	0,	0,	IF_LA,	0x387e8000)
-INST(stle_w,	"stle.w",	0,	0,	IF_LA,	0x387f0000)
-INST(stle_d,	"stle.d",	0,	0,	IF_LA,	0x387f8000)
-
-INST(dbar,    "dbar", 0,      0,      IF_LA,  0x38720000)
-INST(ibar,    "ibar", 0,      0,      IF_LA,  0x38728000)
-
-INST(syscall, "syscall",      0,      0,      IF_LA,  0x002b0000)
-INST(break,   "break",        0,      0,      IF_LA,  0x002a0005)
-
-INST(asrtle_d,        "asrtle.d",     0,      0,      IF_LA,  0x00010000)
-INST(asrtgt_d,        "asrtgt.d",     0,      0,      IF_LA,  0x00018000)
-
-INST(preld,   "preld",        0,      LD,     IF_LA,  0x2ac00000)
-INST(preldx,  "preldx",       0,      LD,     IF_LA,  0x382c0000)
+INST(st_b,          "st.b",           0,    ST,   IF_LA,    0x29000000)
+INST(st_h,          "st.h",           0,    ST,   IF_LA,    0x29400000)
+INST(st_w,          "st.w",           0,    ST,   IF_LA,    0x29800000)
+INST(st_d,          "st.d",           0,    ST,   IF_LA,    0x29c00000)
+
+INST(stptr_w,       "stptr.w",        0,    ST,   IF_LA,    0x25000000)
+INST(stptr_d,       "stptr.d",        0,    ST,   IF_LA,    0x27000000)
+INST(sc_w,          "sc.w",           0,    0,    IF_LA,    0x21000000)
+INST(sc_d,          "sc.d",           0,    0,    IF_LA,    0x23000000)
+
+INST(stx_b,         "stx.b",          0,    ST,   IF_LA,    0x38100000)
+INST(stx_h,         "stx.h",          0,    ST,   IF_LA,    0x38140000)
+INST(stx_w,         "stx.w",          0,    ST,   IF_LA,    0x38180000)
+INST(stx_d,         "stx.d",          0,    ST,   IF_LA,    0x381c0000)
+INST(stgt_b,        "stgt.b",         0,    0,    IF_LA,    0x387c0000)
+INST(stgt_h,        "stgt.h",         0,    0,    IF_LA,    0x387c8000)
+INST(stgt_w,        "stgt.w",         0,    0,    IF_LA,    0x387d0000)
+INST(stgt_d,        "stgt.d",         0,    0,    IF_LA,    0x387d8000)
+INST(stle_b,        "stle.b",         0,    0,    IF_LA,    0x387e0000)
+INST(stle_h,        "stle.h",         0,    0,    IF_LA,    0x387e8000)
+INST(stle_w,        "stle.w",         0,    0,    IF_LA,    0x387f0000)
+INST(stle_d,        "stle.d",         0,    0,    IF_LA,    0x387f8000)
+
+INST(dbar,          "dbar",           0,    0,    IF_LA,    0x38720000)
+INST(ibar,          "ibar",           0,    0,    IF_LA,    0x38728000)
+
+INST(syscall,       "syscall",        0,    0,    IF_LA,    0x002b0000)
+INST(break,         "break",          0,    0,    IF_LA,    0x002a0005)
+
+INST(asrtle_d,      "asrtle.d",       0,    0,    IF_LA,    0x00010000)
+INST(asrtgt_d,      "asrtgt.d",       0,    0,    IF_LA,    0x00018000)
+
+INST(preld,         "preld",          0,    LD,   IF_LA,    0x2ac00000)
+INST(preldx,        "preldx",         0,    LD,   IF_LA,    0x382c0000)
 
 ////Float instructions.
 ////R_R_R.
-INST(fadd_s,	"fadd.s",	0,	0,	IF_LA,	0x01008000)
-INST(fadd_d,	"fadd.d",	0,	0,	IF_LA,	0x01010000)
-INST(fsub_s,	"fsub.s",	0,	0,	IF_LA,	0x01028000)
-INST(fsub_d,	"fsub.d",	0,	0,	IF_LA,	0x01030000)
-INST(fmul_s,	"fmul.s",	0,	0,	IF_LA,	0x01048000)
-INST(fmul_d,	"fmul.d",	0,	0,	IF_LA,	0x01050000)
-INST(fdiv_s,	"fdiv.s",	0,	0,	IF_LA,	0x01068000)
-INST(fdiv_d,	"fdiv.d",	0,	0,	IF_LA,	0x01070000)
-
-INST(fmax_s,	"fmax.s",	0,	0,	IF_LA,	0x01088000)
-INST(fmax_d,	"fmax.d",	0,	0,	IF_LA,	0x01090000)
-INST(fmin_s,	"fmin.s",	0,	0,	IF_LA,	0x010a8000)
-INST(fmin_d,	"fmin.d",	0,	0,	IF_LA,	0x010b0000)
-INST(fmaxa_s,	"fmaxa.s",	0,	0,	IF_LA,	0x010c8000)
-INST(fmaxa_d,	"fmaxa.d",	0,	0,	IF_LA,	0x010d0000)
-INST(fmina_s,	"fmina.s",	0,	0,	IF_LA,	0x010e8000)
-INST(fmina_d,	"fmina.d",	0,	0,	IF_LA,	0x010f0000)
-
-INST(fscaleb_s,	"fscaleb.s",	0,	0,	IF_LA,	0x01108000)
-INST(fscaleb_d,	"fscaleb.d",	0,	0,	IF_LA,	0x01110000)
-
-INST(fcopysign_s,	"fcopysign.s",	0,	0,	IF_LA,	0x01128000)
-INST(fcopysign_d,	"fcopysign.d",	0,	0,	IF_LA,	0x01130000)
-
-INST(fldx_s,	"fldx.s",	0,	LD,	IF_LA,	0x38300000)
-INST(fldx_d,	"fldx.d",	0,	LD,	IF_LA,	0x38340000)
-INST(fstx_s,	"fstx.s",	0,	ST,	IF_LA,	0x38380000)
-INST(fstx_d,	"fstx.d",	0,	ST,	IF_LA,	0x383c0000)
-
-INST(fldgt_s,	"fldgt.s",	0,	0,	IF_LA,	0x38740000)
-INST(fldgt_d,	"fldgt.d",	0,	0,	IF_LA,	0x38748000)
-INST(fldle_s,	"fldle.s",	0,	0,	IF_LA,	0x38750000)
-INST(fldle_d,	"fldle.d",	0,	0,	IF_LA,	0x38758000)
-INST(fstgt_s,	"fstgt.s",	0,	0,	IF_LA,	0x38760000)
-INST(fstgt_d,	"fstgt.d",	0,	0,	IF_LA,	0x38768000)
-INST(fstle_s,	"fstle.s",	0,	0,	IF_LA,	0x38770000)
-INST(fstle_d,	"fstle.d",	0,	0,	IF_LA,	0x38778000)
+INST(fadd_s,        "fadd.s",         0,    0,    IF_LA,    0x01008000)
+INST(fadd_d,        "fadd.d",         0,    0,    IF_LA,    0x01010000)
+INST(fsub_s,        "fsub.s",         0,    0,    IF_LA,    0x01028000)
+INST(fsub_d,        "fsub.d",         0,    0,    IF_LA,    0x01030000)
+INST(fmul_s,        "fmul.s",         0,    0,    IF_LA,    0x01048000)
+INST(fmul_d,        "fmul.d",         0,    0,    IF_LA,    0x01050000)
+INST(fdiv_s,        "fdiv.s",         0,    0,    IF_LA,    0x01068000)
+INST(fdiv_d,        "fdiv.d",         0,    0,    IF_LA,    0x01070000)
+
+INST(fmax_s,        "fmax.s",         0,    0,    IF_LA,    0x01088000)
+INST(fmax_d,        "fmax.d",         0,    0,    IF_LA,    0x01090000)
+INST(fmin_s,        "fmin.s",         0,    0,    IF_LA,    0x010a8000)
+INST(fmin_d,        "fmin.d",         0,    0,    IF_LA,    0x010b0000)
+INST(fmaxa_s,       "fmaxa.s",        0,    0,    IF_LA,    0x010c8000)
+INST(fmaxa_d,       "fmaxa.d",        0,    0,    IF_LA,    0x010d0000)
+INST(fmina_s,       "fmina.s",        0,    0,    IF_LA,    0x010e8000)
+INST(fmina_d,       "fmina.d",        0,    0,    IF_LA,    0x010f0000)
+
+INST(fscaleb_s,     "fscaleb.s",      0,    0,    IF_LA,    0x01108000)
+INST(fscaleb_d,     "fscaleb.d",      0,    0,    IF_LA,    0x01110000)
+
+INST(fcopysign_s,   "fcopysign.s",    0,    0,    IF_LA,    0x01128000)
+INST(fcopysign_d,   "fcopysign.d",    0,    0,    IF_LA,    0x01130000)
+
+INST(fldx_s,        "fldx.s",         0,    LD,   IF_LA,    0x38300000)
+INST(fldx_d,        "fldx.d",         0,    LD,   IF_LA,    0x38340000)
+INST(fstx_s,        "fstx.s",         0,    ST,   IF_LA,    0x38380000)
+INST(fstx_d,        "fstx.d",         0,    ST,   IF_LA,    0x383c0000)
+
+INST(fldgt_s,       "fldgt.s",        0,    0,    IF_LA,    0x38740000)
+INST(fldgt_d,       "fldgt.d",        0,    0,    IF_LA,    0x38748000)
+INST(fldle_s,       "fldle.s",        0,    0,    IF_LA,    0x38750000)
+INST(fldle_d,       "fldle.d",        0,    0,    IF_LA,    0x38758000)
+INST(fstgt_s,       "fstgt.s",        0,    0,    IF_LA,    0x38760000)
+INST(fstgt_d,       "fstgt.d",        0,    0,    IF_LA,    0x38768000)
+INST(fstle_s,       "fstle.s",        0,    0,    IF_LA,    0x38770000)
+INST(fstle_d,       "fstle.d",        0,    0,    IF_LA,    0x38778000)
 
 ////R_R_R_R.
-INST(fmadd_s,	"fmadd.s",	0,	0,	IF_LA,	0x08100000)
-INST(fmadd_d,	"fmadd.d",	0,	0,	IF_LA,	0x08200000)
-INST(fmsub_s,	"fmsub.s",	0,	0,	IF_LA,	0x08500000)
-INST(fmsub_d,	"fmsub.d",	0,	0,	IF_LA,	0x08600000)
-INST(fnmadd_s,	"fnmadd.s",	0,	0,	IF_LA,	0x08900000)
-INST(fnmadd_d,	"fnmadd.d",	0,	0,	IF_LA,	0x08a00000)
-INST(fnmsub_s,	"fnmsub.s",	0,	0,	IF_LA,	0x08d00000)
-INST(fnmsub_d,	"fnmsub.d",	0,	0,	IF_LA,	0x08e00000)
+INST(fmadd_s,       "fmadd.s",        0,    0,    IF_LA,    0x08100000)
+INST(fmadd_d,       "fmadd.d",        0,    0,    IF_LA,    0x08200000)
+INST(fmsub_s,       "fmsub.s",        0,    0,    IF_LA,    0x08500000)
+INST(fmsub_d,       "fmsub.d",        0,    0,    IF_LA,    0x08600000)
+INST(fnmadd_s,      "fnmadd.s",       0,    0,    IF_LA,    0x08900000)
+INST(fnmadd_d,      "fnmadd.d",       0,    0,    IF_LA,    0x08a00000)
+INST(fnmsub_s,      "fnmsub.s",       0,    0,    IF_LA,    0x08d00000)
+INST(fnmsub_d,      "fnmsub.d",       0,    0,    IF_LA,    0x08e00000)
 
 ////R_R.
-INST(fabs_s,	"fabs.s",	0,	0,	IF_LA,	0x01140400)
-INST(fabs_d,	"fabs.d",	0,	0,	IF_LA,	0x01140800)
-INST(fneg_s,	"fneg.s",	0,	0,	IF_LA,	0x01141400)
-INST(fneg_d,	"fneg.d",	0,	0,	IF_LA,	0x01141800)
-
-INST(fsqrt_s,	"fsqrt.s",	0,	0,	IF_LA,	0x01144400)
-INST(fsqrt_d,	"fsqrt.d",	0,	0,	IF_LA,	0x01144800)
-INST(frsqrt_s,	"frsqrt.s",	0,	0,	IF_LA,	0x01146400)
-INST(frsqrt_d,	"frsqrt.d",	0,	0,	IF_LA,	0x01146800)
-INST(frecip_s,	"frecip.s",	0,	0,	IF_LA,	0x01145400)
-INST(frecip_d,	"frecip.d",	0,	0,	IF_LA,	0x01145800)
-INST(flogb_s,	"flogb.s",	0,	0,	IF_LA,	0x01142400)
-INST(flogb_d,	"flogb.d",	0,	0,	IF_LA,	0x01142800)
-INST(fclass_s,	"fclass.s",	0,	0,	IF_LA,	0x01143400)
-INST(fclass_d,	"fclass.d",	0,	0,	IF_LA,	0x01143800)
-
-INST(fcvt_s_d,	"fcvt.s.d",	0,	0,	IF_LA,	0x01191800)
-INST(fcvt_d_s,	"fcvt.d.s",	0,	0,	IF_LA,	0x01192400)
-INST(ffint_s_w,	"ffint.s.w",	0,	0,	IF_LA,	0x011d1000)
-INST(ffint_s_l,	"ffint.s.l",	0,	0,	IF_LA,	0x011d1800)
-INST(ffint_d_w,	"ffint.d.w",	0,	0,	IF_LA,	0x011d2000)
-INST(ffint_d_l,	"ffint.d.l",	0,	0,	IF_LA,	0x011d2800)
-INST(ftint_w_s,	"ftint.w.s",	0,	0,	IF_LA,	0x011b0400)
-INST(ftint_w_d,	"ftint.w.d",	0,	0,	IF_LA,	0x011b0800)
-INST(ftint_l_s,	"ftint.l.s",	0,	0,	IF_LA,	0x011b2400)
-INST(ftint_l_d,	"ftint.l.d",	0,	0,	IF_LA,	0x011b2800)
-INST(ftintrm_w_s,	"ftintrm.w.s",	0,	0,	IF_LA,	0x011a0400)
-INST(ftintrm_w_d,	"ftintrm.w.d",	0,	0,	IF_LA,	0x011a0800)
-INST(ftintrm_l_s,	"ftintrm.l.s",	0,	0,	IF_LA,	0x011a2400)
-INST(ftintrm_l_d,	"ftintrm.l.d",	0,	0,	IF_LA,	0x011a2800)
-INST(ftintrp_w_s,	"ftintrp.w.s",	0,	0,	IF_LA,	0x011a4400)
-INST(ftintrp_w_d,	"ftintrp.w.d",	0,	0,	IF_LA,	0x011a4800)
-INST(ftintrp_l_s,	"ftintrp.l.s",	0,	0,	IF_LA,	0x011a6400)
-INST(ftintrp_l_d,	"ftintrp.l.d",	0,	0,	IF_LA,	0x011a6800)
-INST(ftintrz_w_s,	"ftintrz.w.s",	0,	0,	IF_LA,	0x011a8400)
-INST(ftintrz_w_d,	"ftintrz.w.d",	0,	0,	IF_LA,	0x011a8800)
-INST(ftintrz_l_s,	"ftintrz.l.s",	0,	0,	IF_LA,	0x011aa400)
-INST(ftintrz_l_d,	"ftintrz.l.d",	0,	0,	IF_LA,	0x011aa800)
-INST(ftintrne_w_s,	"ftintrne.w.s",	0,	0,	IF_LA,	0x011ac400)
-INST(ftintrne_w_d,	"ftintrne.w.d",	0,	0,	IF_LA,	0x011ac800)
-INST(ftintrne_l_s,	"ftintrne.l.s",	0,	0,	IF_LA,	0x011ae400)
-INST(ftintrne_l_d,	"ftintrne.l.d",	0,	0,	IF_LA,	0x011ae800)
-INST(frint_s,	"frint.s",	0,	0,	IF_LA,	0x011e4400)
-INST(frint_d,	"frint.d",	0,	0,	IF_LA,	0x011e4800)
-
-INST(fmov_s,	"fmov.s",	0,	0,	IF_LA,	0x01149400)
-INST(fmov_d,	"fmov.d",	0,	0,	IF_LA,	0x01149800)
-
-INST(movgr2fr_w,	"movgr2fr.w",	0,	0,	IF_LA,	0x0114a400)
-INST(movgr2fr_d,	"movgr2fr.d",	0,	0,	IF_LA,	0x0114a800)
-INST(movgr2frh_w,	"movgr2frh.w",	0,	0,	IF_LA,	0x0114ac00)
-INST(movfr2gr_s,	"movfr2gr.s",	0,	0,	IF_LA,	0x0114b400)
-INST(movfr2gr_d,	"movfr2gr.d",	0,	0,	IF_LA,	0x0114b800)
-INST(movfrh2gr_s,	"movfrh2gr.s",	0,	0,	IF_LA,	0x0114bc00)
+INST(fabs_s,        "fabs.s",         0,    0,    IF_LA,    0x01140400)
+INST(fabs_d,        "fabs.d",         0,    0,    IF_LA,    0x01140800)
+INST(fneg_s,        "fneg.s",         0,    0,    IF_LA,    0x01141400)
+INST(fneg_d,        "fneg.d",         0,    0,    IF_LA,    0x01141800)
+
+INST(fsqrt_s,       "fsqrt.s",        0,    0,    IF_LA,    0x01144400)
+INST(fsqrt_d,       "fsqrt.d",        0,    0,    IF_LA,    0x01144800)
+INST(frsqrt_s,      "frsqrt.s",       0,    0,    IF_LA,    0x01146400)
+INST(frsqrt_d,      "frsqrt.d",       0,    0,    IF_LA,    0x01146800)
+INST(frecip_s,      "frecip.s",       0,    0,    IF_LA,    0x01145400)
+INST(frecip_d,      "frecip.d",       0,    0,    IF_LA,    0x01145800)
+INST(flogb_s,       "flogb.s",        0,    0,    IF_LA,    0x01142400)
+INST(flogb_d,       "flogb.d",        0,    0,    IF_LA,    0x01142800)
+INST(fclass_s,      "fclass.s",       0,    0,    IF_LA,    0x01143400)
+INST(fclass_d,      "fclass.d",       0,    0,    IF_LA,    0x01143800)
+
+INST(fcvt_s_d,      "fcvt.s.d",       0,    0,    IF_LA,    0x01191800)
+INST(fcvt_d_s,      "fcvt.d.s",       0,    0,    IF_LA,    0x01192400)
+INST(ffint_s_w,     "ffint.s.w",      0,    0,    IF_LA,    0x011d1000)
+INST(ffint_s_l,     "ffint.s.l",      0,    0,    IF_LA,    0x011d1800)
+INST(ffint_d_w,     "ffint.d.w",      0,    0,    IF_LA,    0x011d2000)
+INST(ffint_d_l,     "ffint.d.l",      0,    0,    IF_LA,    0x011d2800)
+INST(ftint_w_s,     "ftint.w.s",      0,    0,    IF_LA,    0x011b0400)
+INST(ftint_w_d,     "ftint.w.d",      0,    0,    IF_LA,    0x011b0800)
+INST(ftint_l_s,     "ftint.l.s",      0,    0,    IF_LA,    0x011b2400)
+INST(ftint_l_d,     "ftint.l.d",      0,    0,    IF_LA,    0x011b2800)
+INST(ftintrm_w_s,   "ftintrm.w.s",    0,    0,    IF_LA,    0x011a0400)
+INST(ftintrm_w_d,   "ftintrm.w.d",    0,    0,    IF_LA,    0x011a0800)
+INST(ftintrm_l_s,   "ftintrm.l.s",    0,    0,    IF_LA,    0x011a2400)
+INST(ftintrm_l_d,   "ftintrm.l.d",    0,    0,    IF_LA,    0x011a2800)
+INST(ftintrp_w_s,   "ftintrp.w.s",    0,    0,    IF_LA,    0x011a4400)
+INST(ftintrp_w_d,   "ftintrp.w.d",    0,    0,    IF_LA,    0x011a4800)
+INST(ftintrp_l_s,   "ftintrp.l.s",    0,    0,    IF_LA,    0x011a6400)
+INST(ftintrp_l_d,   "ftintrp.l.d",    0,    0,    IF_LA,    0x011a6800)
+INST(ftintrz_w_s,   "ftintrz.w.s",    0,    0,    IF_LA,    0x011a8400)
+INST(ftintrz_w_d,   "ftintrz.w.d",    0,    0,    IF_LA,    0x011a8800)
+INST(ftintrz_l_s,   "ftintrz.l.s",    0,    0,    IF_LA,    0x011aa400)
+INST(ftintrz_l_d,   "ftintrz.l.d",    0,    0,    IF_LA,    0x011aa800)
+INST(ftintrne_w_s,  "ftintrne.w.s",   0,    0,    IF_LA,    0x011ac400)
+INST(ftintrne_w_d,  "ftintrne.w.d",   0,    0,    IF_LA,    0x011ac800)
+INST(ftintrne_l_s,  "ftintrne.l.s",   0,    0,    IF_LA,    0x011ae400)
+INST(ftintrne_l_d,  "ftintrne.l.d",   0,    0,    IF_LA,    0x011ae800)
+INST(frint_s,       "frint.s",        0,    0,    IF_LA,    0x011e4400)
+INST(frint_d,       "frint.d",        0,    0,    IF_LA,    0x011e4800)
+
+INST(fmov_s,        "fmov.s",         0,    0,    IF_LA,    0x01149400)
+INST(fmov_d,        "fmov.d",         0,    0,    IF_LA,    0x01149800)
+
+INST(movgr2fr_w,    "movgr2fr.w",     0,    0,    IF_LA,    0x0114a400)
+INST(movgr2fr_d,    "movgr2fr.d",     0,    0,    IF_LA,    0x0114a800)
+INST(movgr2frh_w,   "movgr2frh.w",    0,    0,    IF_LA,    0x0114ac00)
+INST(movfr2gr_s,    "movfr2gr.s",     0,    0,    IF_LA,    0x0114b400)
+INST(movfr2gr_d,    "movfr2gr.d",     0,    0,    IF_LA,    0x0114b800)
+INST(movfrh2gr_s,   "movfrh2gr.s",    0,    0,    IF_LA,    0x0114bc00)
 
 ////
-INST(movgr2fcsr,	"movgr2fcsr",	0,	0,	IF_LA,	0x0114c000)
-INST(movfcsr2gr,	"movfcsr2gr",	0,	0,	IF_LA,	0x0114c800)
-INST(movfr2cf,	"movfr2cf",	0,	0,	IF_LA,	0x0114d000)
-INST(movcf2fr,	"movcf2fr",	0,	0,	IF_LA,	0x0114d400)
-INST(movgr2cf,	"movgr2cf",	0,	0,	IF_LA,	0x0114d800)
-INST(movcf2gr,	"movcf2gr",	0,	0,	IF_LA,	0x0114dc00)
+INST(movgr2fcsr,    "movgr2fcsr",     0,    0,    IF_LA,    0x0114c000)
+INST(movfcsr2gr,    "movfcsr2gr",     0,    0,    IF_LA,    0x0114c800)
+INST(movfr2cf,      "movfr2cf",       0,    0,    IF_LA,    0x0114d000)
+INST(movcf2fr,      "movcf2fr",       0,    0,    IF_LA,    0x0114d400)
+INST(movgr2cf,      "movgr2cf",       0,    0,    IF_LA,    0x0114d800)
+INST(movcf2gr,      "movcf2gr",       0,    0,    IF_LA,    0x0114dc00)
 
 ////R_R_I.
-INST(fcmp_caf_s,	"fcmp.caf.s",	0,	0,	IF_LA,	0x0c100000)
-INST(fcmp_cun_s,	"fcmp.cun.s",	0,	0,	IF_LA,	0x0c140000)
-INST(fcmp_ceq_s,	"fcmp.ceq.s",	0,	0,	IF_LA,	0x0c120000)
-INST(fcmp_cueq_s,	"fcmp.cueq.s",	0,	0,	IF_LA,	0x0c160000)
-INST(fcmp_clt_s,	"fcmp.clt.s",	0,	0,	IF_LA,	0x0c110000)
-INST(fcmp_cult_s,	"fcmp.cult.s",	0,	0,	IF_LA,	0x0c150000)
-INST(fcmp_cle_s,	"fcmp.cle.s",	0,	0,	IF_LA,	0x0c130000)
-INST(fcmp_cule_s,	"fcmp.cule.s",	0,	0,	IF_LA,	0x0c170000)
-INST(fcmp_cne_s,	"fcmp.cne.s",	0,	0,	IF_LA,	0x0c180000)
-INST(fcmp_cor_s,	"fcmp.cor.s",	0,	0,	IF_LA,	0x0c1a0000)
-INST(fcmp_cune_s,	"fcmp.cune.s",	0,	0,	IF_LA,	0x0c1c0000)
-
-INST(fcmp_saf_d,	"fcmp.saf.d",	0,	0,	IF_LA,	0x0c208000)
-INST(fcmp_sun_d,	"fcmp.sun.d",	0,	0,	IF_LA,	0x0c248000)
-INST(fcmp_seq_d,	"fcmp.seq.d",	0,	0,	IF_LA,	0x0c228000)
-INST(fcmp_sueq_d,	"fcmp.sueq.d",	0,	0,	IF_LA,	0x0c268000)
-INST(fcmp_slt_d,	"fcmp.slt.d",	0,	0,	IF_LA,	0x0c218000)
-INST(fcmp_sult_d,	"fcmp.sult.d",	0,	0,	IF_LA,	0x0c258000)
-INST(fcmp_sle_d,	"fcmp.sle.d",	0,	0,	IF_LA,	0x0c238000)
-INST(fcmp_sule_d,	"fcmp.sule.d",	0,	0,	IF_LA,	0x0c278000)
-INST(fcmp_sne_d,	"fcmp.sne.d",	0,	0,	IF_LA,	0x0c288000)
-INST(fcmp_sor_d,	"fcmp.sor.d",	0,	0,	IF_LA,	0x0c2a8000)
-INST(fcmp_sune_d,	"fcmp.sune.d",	0,	0,	IF_LA,	0x0c2c8000)
-
-INST(fcmp_caf_d,	"fcmp.caf.d",	0,	0,	IF_LA,	0x0c200000)
-INST(fcmp_cun_d,	"fcmp.cun.d",	0,	0,	IF_LA,	0x0c240000)
-INST(fcmp_ceq_d,	"fcmp.ceq.d",	0,	0,	IF_LA,	0x0c220000)
-INST(fcmp_cueq_d,	"fcmp.cueq.d",	0,	0,	IF_LA,	0x0c260000)
-INST(fcmp_clt_d,	"fcmp.clt.d",	0,	0,	IF_LA,	0x0c210000)
-INST(fcmp_cult_d,	"fcmp.cult.d",	0,	0,	IF_LA,	0x0c250000)
-INST(fcmp_cle_d,	"fcmp.cle.d",	0,	0,	IF_LA,	0x0c230000)
-INST(fcmp_cule_d,	"fcmp.cule.d",	0,	0,	IF_LA,	0x0c270000)
-INST(fcmp_cne_d,	"fcmp.cne.d",	0,	0,	IF_LA,	0x0c280000)
-INST(fcmp_cor_d,	"fcmp.cor.d",	0,	0,	IF_LA,	0x0c2a0000)
-INST(fcmp_cune_d,	"fcmp.cune.d",	0,	0,	IF_LA,	0x0c2c0000)
-
-INST(fcmp_saf_s,	"fcmp.saf.s",	0,	0,	IF_LA,	0x0c108000)
-INST(fcmp_sun_s,	"fcmp.sun.s",	0,	0,	IF_LA,	0x0c148000)
-INST(fcmp_seq_s,	"fcmp.seq.s",	0,	0,	IF_LA,	0x0c128000)
-INST(fcmp_sueq_s,	"fcmp.sueq.s",	0,	0,	IF_LA,	0x0c168000)
-INST(fcmp_slt_s,	"fcmp.slt.s",	0,	0,	IF_LA,	0x0c118000)
-INST(fcmp_sult_s,	"fcmp.sult.s",	0,	0,	IF_LA,	0x0c158000)
-INST(fcmp_sle_s,	"fcmp.sle.s",	0,	0,	IF_LA,	0x0c138000)
-INST(fcmp_sule_s,	"fcmp.sule.s",	0,	0,	IF_LA,	0x0c178000)
-INST(fcmp_sne_s,	"fcmp.sne.s",	0,	0,	IF_LA,	0x0c188000)
-INST(fcmp_sor_s,	"fcmp.sor.s",	0,	0,	IF_LA,	0x0c1a8000)
-INST(fcmp_sune_s,	"fcmp.sune.s",	0,	0,	IF_LA,	0x0c1c8000)
+INST(fcmp_caf_s,    "fcmp.caf.s",     0,    0,    IF_LA,    0x0c100000)
+INST(fcmp_cun_s,    "fcmp.cun.s",     0,    0,    IF_LA,    0x0c140000)
+INST(fcmp_ceq_s,    "fcmp.ceq.s",     0,    0,    IF_LA,    0x0c120000)
+INST(fcmp_cueq_s,   "fcmp.cueq.s",    0,    0,    IF_LA,    0x0c160000)
+INST(fcmp_clt_s,    "fcmp.clt.s",     0,    0,    IF_LA,    0x0c110000)
+INST(fcmp_cult_s,   "fcmp.cult.s",    0,    0,    IF_LA,    0x0c150000)
+INST(fcmp_cle_s,    "fcmp.cle.s",     0,    0,    IF_LA,    0x0c130000)
+INST(fcmp_cule_s,   "fcmp.cule.s",    0,    0,    IF_LA,    0x0c170000)
+INST(fcmp_cne_s,    "fcmp.cne.s",     0,    0,    IF_LA,    0x0c180000)
+INST(fcmp_cor_s,    "fcmp.cor.s",     0,    0,    IF_LA,    0x0c1a0000)
+INST(fcmp_cune_s,    "fcmp.cune.s",   0,    0,    IF_LA,    0x0c1c0000)
+
+INST(fcmp_saf_d,    "fcmp.saf.d",     0,    0,    IF_LA,    0x0c208000)
+INST(fcmp_sun_d,    "fcmp.sun.d",     0,    0,    IF_LA,    0x0c248000)
+INST(fcmp_seq_d,    "fcmp.seq.d",     0,    0,    IF_LA,    0x0c228000)
+INST(fcmp_sueq_d,   "fcmp.sueq.d",    0,    0,    IF_LA,    0x0c268000)
+INST(fcmp_slt_d,    "fcmp.slt.d",     0,    0,    IF_LA,    0x0c218000)
+INST(fcmp_sult_d,   "fcmp.sult.d",    0,    0,    IF_LA,    0x0c258000)
+INST(fcmp_sle_d,    "fcmp.sle.d",     0,    0,    IF_LA,    0x0c238000)
+INST(fcmp_sule_d,   "fcmp.sule.d",    0,    0,    IF_LA,    0x0c278000)
+INST(fcmp_sne_d,    "fcmp.sne.d",     0,    0,    IF_LA,    0x0c288000)
+INST(fcmp_sor_d,    "fcmp.sor.d",     0,    0,    IF_LA,    0x0c2a8000)
+INST(fcmp_sune_d,   "fcmp.sune.d",    0,    0,    IF_LA,    0x0c2c8000)
+
+INST(fcmp_caf_d,    "fcmp.caf.d",     0,    0,    IF_LA,    0x0c200000)
+INST(fcmp_cun_d,    "fcmp.cun.d",     0,    0,    IF_LA,    0x0c240000)
+INST(fcmp_ceq_d,    "fcmp.ceq.d",     0,    0,    IF_LA,    0x0c220000)
+INST(fcmp_cueq_d,   "fcmp.cueq.d",    0,    0,    IF_LA,    0x0c260000)
+INST(fcmp_clt_d,    "fcmp.clt.d",     0,    0,    IF_LA,    0x0c210000)
+INST(fcmp_cult_d,   "fcmp.cult.d",    0,    0,    IF_LA,    0x0c250000)
+INST(fcmp_cle_d,    "fcmp.cle.d",     0,    0,    IF_LA,    0x0c230000)
+INST(fcmp_cule_d,   "fcmp.cule.d",    0,    0,    IF_LA,    0x0c270000)
+INST(fcmp_cne_d,    "fcmp.cne.d",     0,    0,    IF_LA,    0x0c280000)
+INST(fcmp_cor_d,    "fcmp.cor.d",     0,    0,    IF_LA,    0x0c2a0000)
+INST(fcmp_cune_d,   "fcmp.cune.d",    0,    0,    IF_LA,    0x0c2c0000)
+
+INST(fcmp_saf_s,    "fcmp.saf.s",     0,    0,    IF_LA,    0x0c108000)
+INST(fcmp_sun_s,    "fcmp.sun.s",     0,    0,    IF_LA,    0x0c148000)
+INST(fcmp_seq_s,    "fcmp.seq.s",     0,    0,    IF_LA,    0x0c128000)
+INST(fcmp_sueq_s,   "fcmp.sueq.s",    0,    0,    IF_LA,    0x0c168000)
+INST(fcmp_slt_s,    "fcmp.slt.s",     0,    0,    IF_LA,    0x0c118000)
+INST(fcmp_sult_s,   "fcmp.sult.s",    0,    0,    IF_LA,    0x0c158000)
+INST(fcmp_sle_s,    "fcmp.sle.s",     0,    0,    IF_LA,    0x0c138000)
+INST(fcmp_sule_s,   "fcmp.sule.s",    0,    0,    IF_LA,    0x0c178000)
+INST(fcmp_sne_s,    "fcmp.sne.s",     0,    0,    IF_LA,    0x0c188000)
+INST(fcmp_sor_s,    "fcmp.sor.s",     0,    0,    IF_LA,    0x0c1a8000)
+INST(fcmp_sune_s,   "fcmp.sune.s",    0,    0,    IF_LA,    0x0c1c8000)
 
 ////R_R_I.
-INST(fld_s,	"fld.s",	0,	LD,	IF_LA,	0x2b000000)
-INST(fld_d,	"fld.d",	0,	LD,	IF_LA,	0x2b800000)
-INST(fst_s,	"fst.s",	0,	ST,	IF_LA,	0x2b400000)
-INST(fst_d,	"fst.d",	0,	ST,	IF_LA,	0x2bc00000)
+INST(fld_s,         "fld.s",          0,    LD,    IF_LA,   0x2b000000)
+INST(fld_d,         "fld.d",          0,    LD,    IF_LA,   0x2b800000)
+INST(fst_s,         "fst.s",          0,    ST,    IF_LA,   0x2b400000)
+INST(fst_d,         "fst.d",          0,    ST,    IF_LA,   0x2bc00000)
 
 // clang-format on
 /*****************************************************************************/
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 414b223ee89b2..3d47ed3322b27 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -812,7 +812,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         if (compFeatureArgSplit())
         {
             // This does not affect the normal calling convention for LoongArch64!!
-            if (this->info.compIsVarArgs && argType == TYP_STRUCT)
+            if (this->info.compIsVarArgs && (argType == TYP_STRUCT))
             {
                 if (varDscInfo->canEnreg(TYP_INT, 1) &&     // The beginning of the struct can go in a register
                     !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register
@@ -885,53 +885,53 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         }
         else
 #elif defined(TARGET_LOONGARCH64)
-        var_types arg1_Type = TYP_UNKNOWN;
-        var_types arg2_Type = TYP_UNKNOWN;
+        var_types arg1Type = TYP_UNKNOWN;
+        var_types arg2Type = TYP_UNKNOWN;
         if (floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK)
         {
             assert(varTypeIsStruct(argType));
-            int float_num = 0;
+            int floatNum = 0;
             if (floatFlags == STRUCT_FLOAT_FIELD_ONLY_ONE)
             {
                 assert(argSize <= 8);
                 assert(varDsc->lvExactSize <= argSize);
-                float_num = 1;
+                floatNum = 1;
 
-                arg1_Type             = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT;
-                canPassArgInRegisters = varDscInfo->canEnreg(arg1_Type, 1);
+                arg1Type              = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT;
+                canPassArgInRegisters = varDscInfo->canEnreg(arg1Type, 1);
             }
             else if (floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
             {
-                arg1_Type             = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                arg2_Type             = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                float_num             = 2;
+                arg1Type              = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                arg2Type              = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                floatNum              = 2;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2);
             }
             else if (floatFlags & STRUCT_FLOAT_FIELD_FIRST)
             {
-                float_num             = 1;
+                floatNum              = 1;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
                 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
 
-                arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
             }
             else if (floatFlags & STRUCT_FLOAT_FIELD_SECOND)
             {
-                float_num             = 1;
+                floatNum              = 1;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
                 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
 
-                arg1_Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
-                arg2_Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
             }
 
             if (!canPassArgInRegisters)
             {
-                assert(float_num > 0);
+                assert(floatNum > 0);
                 canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
-                arg1_Type             = TYP_UNKNOWN;
-                arg2_Type             = TYP_UNKNOWN;
+                arg1Type              = TYP_UNKNOWN;
+                arg2Type              = TYP_UNKNOWN;
             }
         }
         else
@@ -947,7 +947,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             if (!canPassArgInRegisters && (cSlots > 1))
             {
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1);
-                arg1_Type             = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN;
+                arg1Type              = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN;
             }
 #endif
         }
@@ -980,9 +980,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             }
             else
 #elif defined(TARGET_LOONGARCH64)
-            if (arg1_Type != TYP_UNKNOWN)
+            if (arg1Type != TYP_UNKNOWN)
             {
-                firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1_Type, 1);
+                firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1Type, 1);
             }
             else
 #endif // defined(TARGET_LOONGARCH64)
@@ -1036,15 +1036,15 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
 #elif defined(TARGET_LOONGARCH64)
             if (argType == TYP_STRUCT)
             {
-                if (arg1_Type != TYP_UNKNOWN)
+                if (arg1Type != TYP_UNKNOWN)
                 {
-                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg1_Type));
-                    varDsc->lvIs4Field1 = (int)emitActualTypeSize(arg1_Type) == 4 ? 1 : 0;
-                    if (arg2_Type != TYP_UNKNOWN)
+                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg1Type));
+                    varDsc->lvIs4Field1 = (int)emitActualTypeSize(arg1Type) == 4 ? 1 : 0;
+                    if (arg2Type != TYP_UNKNOWN)
                     {
-                        firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2_Type, 1);
-                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2_Type));
-                        varDsc->lvIs4Field2            = (int)emitActualTypeSize(arg2_Type) == 4 ? 1 : 0;
+                        firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2Type, 1);
+                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2Type));
+                        varDsc->lvIs4Field2            = (int)emitActualTypeSize(arg2Type) == 4 ? 1 : 0;
                         varDscInfo->hasMultiSlotStruct = true;
                     }
                     else if (cSlots > 1)
@@ -1052,7 +1052,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                         varDsc->lvIsSplit = 1;
                         varDsc->SetOtherArgReg(REG_STK);
                         varDscInfo->hasMultiSlotStruct = true;
-                        varDscInfo->setAllRegArgUsed(arg1_Type);
+                        varDscInfo->setAllRegArgUsed(arg1Type);
                         varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
                     }
                 }
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index a39ebbcdbab7c..bc14541634ff5 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -1080,7 +1080,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
 
     bool isOnStack = (info->GetRegNum() == REG_STK);
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     // Mark contained when we pass struct
     // GT_FIELD_LIST is always marked contained when it is generated
     if (type == TYP_STRUCT)
@@ -1091,15 +1091,6 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
             MakeSrcContained(arg, arg->AsObj()->Addr());
         }
     }
-#elif defined(TARGET_LOONGARCH64)
-    if (type == TYP_STRUCT)
-    {
-        arg->SetContained();
-        if ((arg->OperGet() == GT_OBJ) && (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR))
-        {
-            MakeSrcContained(arg, arg->AsObj()->Addr());
-        }
-    }
 #endif
 
 #if FEATURE_ARG_SPLIT
@@ -1484,9 +1475,9 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
 #endif // TARGET_ARMARCH
 
 #if defined(TARGET_LOONGARCH64)
-        if (call->IsVarargs() /*|| comp->opts.compUseSoftFP*/)
+        if (call->IsVarargs())
         {
-            // For vararg call or on armel, reg args should be all integer.
+            // For vararg call, reg args should be all integer.
             // Insert copies as needed to move float value to integer register.
             GenTree* newNode = LowerFloatArg(ppArg, info);
             if (newNode != nullptr)
@@ -1522,7 +1513,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
 
 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 //------------------------------------------------------------------------
-// LowerFloatArg: Lower float call arguments on the arm platform.
+// LowerFloatArg: Lower float call arguments on the arm/LoongArch64 platform.
 //
 // Arguments:
 //    arg  - The arg node
@@ -2853,8 +2844,8 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
 {
     assert(cmp->gtGetOp2()->IsIntegralConst());
 
-#if defined(TARGET_XARCH) || defined(TARGET_ARM64) // || defined(TARGET_LOONGARCH64)
-    ////TODO: add optimize for LoongArch64.
+#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
+    // TODO-LoongArch64: add optimize for LoongArch64.
     GenTree*       op1      = cmp->gtGetOp1();
     GenTreeIntCon* op2      = cmp->gtGetOp2()->AsIntCon();
     ssize_t        op2Value = op2->IconValue();
@@ -5784,7 +5775,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
         }
         else
         {
-#if defined(TARGET_ARM64) //|| defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64)
             // 64-bit MUL is more expensive than UMULL on ARM64.
             genTreeOps mulOper = simpleMul ? GT_MUL_LONG : GT_MULHI;
 #else
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 4196b23578f61..0054decb0b019 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -4,9 +4,9 @@
 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XX                                                                           XX
-XX             Lowering for LOONGARCH64 common code                               XX
+XX             Lowering for LOONGARCH64 common code                          XX
 XX                                                                           XX
-XX  This encapsulates common logic for lowering trees for the LOONGARCH64         XX
+XX  This encapsulates common logic for lowering trees for the LOONGARCH64    XX
 XX  architectures.  For a more detailed view of what is lowering, please     XX
 XX  take a look at Lower.cpp                                                 XX
 XX                                                                           XX
@@ -67,13 +67,12 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
         switch (parentNode->OperGet())
         {
             case GT_ADD:
-                return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false : ((-2048 <= immVal) &&
-                                                                                               (immVal <= 2047));
+                return ((-2048 <= immVal) && (immVal <= 2047));
                 break;
             case GT_CMPXCHG:
             case GT_LOCKADD:
             case GT_XADD:
-                assert(!"unimplemented on LOONGARCH yet");
+                NYI_LOONGARCH64("unimplemented on LOONGARCH yet");
                 break;
 
             case GT_EQ:
@@ -122,37 +121,6 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul)
 {
     assert(mul->OperIsMul());
 
-    // if (comp->opts.OptimizationEnabled() && mul->OperIs(GT_MUL) && mul->IsValidLongMul())
-    //{
-    //    GenTreeCast* op1 = mul->gtGetOp1()->AsCast();
-    //    GenTree*     op2 = mul->gtGetOp2();
-
-    //    mul->ClearOverflow();
-    //    mul->ClearUnsigned();
-    //    if (op1->IsUnsigned())
-    //    {
-    //        mul->SetUnsigned();
-    //    }
-
-    //    mul->gtOp1 = op1->CastOp();
-    //    BlockRange().Remove(op1);
-
-    //    if (op2->OperIs(GT_CAST))
-    //    {
-    //        mul->gtOp2 = op2->AsCast()->CastOp();
-    //        BlockRange().Remove(op2);
-    //    }
-    //    else
-    //    {
-    //        assert(op2->IsIntegralConst());
-    //        assert(FitsIn<int32_t>(op2->AsIntConCommon()->IntegralValue()));
-
-    //        op2->ChangeType(TYP_INT);
-    //    }
-
-    //    mul->ChangeOper(GT_MUL_LONG);
-    //}
-
     ContainCheckMul(mul);
 
     return mul->gtNext;
@@ -322,7 +290,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
             if (fill == 0)
             {
                 src->SetContained();
-                ;
             }
             else if (size >= REGSIZE_BYTES)
             {
@@ -547,19 +514,7 @@ void Lowering::LowerRotate(GenTree* tree)
 //
 void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    assert(simdNode->gtType != TYP_SIMD32);
-
-    if (simdNode->TypeGet() == TYP_SIMD12)
-    {
-        // GT_SIMD node requiring to produce TYP_SIMD12 in fact
-        // produces a TYP_SIMD16 result
-        simdNode->gtType = TYP_SIMD16;
-    }
-
-    ContainCheckSIMD(simdNode);
-#endif
+    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
 }
 #endif // FEATURE_SIMD
 
@@ -572,60 +527,7 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
 //
 void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    auto intrinsicID   = node->gtHWIntrinsicId;
-    auto intrinsicInfo = HWIntrinsicInfo::lookup(node->gtHWIntrinsicId);
-
-    //
-    // Lower unsupported Unsigned Compare Zero intrinsics to their trivial transformations
-    //
-    // LOONGARCH64 does not support most forms of compare zero for Unsigned values
-    // This is because some are non-sensical, and the rest are trivial transformations of other operators
-    //
-    if ((intrinsicInfo.flags & HWIntrinsicInfo::LowerCmpUZero) && varTypeIsUnsigned(node->gtSIMDBaseType))
-    {
-        auto setAllVector = node->gtSIMDSize > 8 ? NI_LOONGARCH64_SIMD_SetAllVector128 : NI_LOONGARCH64_SIMD_SetAllVector64;
-
-        auto origOp1 = node->gtOp.gtOp1;
-
-        switch (intrinsicID)
-        {
-            case NI_LOONGARCH64_SIMD_GT_ZERO:
-                // Unsigned > 0 ==> !(Unsigned == 0)
-                node->gtOp.gtOp1 =
-                    comp->gtNewSimdHWIntrinsicNode(node->TypeGet(), node->gtOp.gtOp1, NI_LOONGARCH64_SIMD_EQ_ZERO,
-                                                   node->gtSIMDBaseType, node->gtSIMDSize);
-                node->gtHWIntrinsicId = NI_LOONGARCH64_SIMD_BitwiseNot;
-                BlockRange().InsertBefore(node, node->gtOp.gtOp1);
-                break;
-            case NI_LOONGARCH64_SIMD_LE_ZERO:
-                // Unsigned <= 0 ==> Unsigned == 0
-                node->gtHWIntrinsicId = NI_LOONGARCH64_SIMD_EQ_ZERO;
-                break;
-            case NI_LOONGARCH64_SIMD_GE_ZERO:
-            case NI_LOONGARCH64_SIMD_LT_ZERO:
-                // Unsigned >= 0 ==> Always true
-                // Unsigned < 0 ==> Always false
-                node->gtHWIntrinsicId = setAllVector;
-                node->gtOp.gtOp1      = comp->gtNewLconNode((intrinsicID == NI_LOONGARCH64_SIMD_GE_ZERO) ? ~0ULL : 0ULL);
-                BlockRange().InsertBefore(node, node->gtOp.gtOp1);
-                if ((origOp1->gtFlags & GTF_ALL_EFFECT) == 0)
-                {
-                    BlockRange().Remove(origOp1, true);
-                }
-                else
-                {
-                    origOp1->SetUnusedValue();
-                }
-                break;
-            default:
-                assert(!"Unhandled LowerCmpUZero case");
-        }
-    }
-
-    ContainCheckHWIntrinsic(node);
-#endif
+    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
 }
 
 //----------------------------------------------------------------------------------------------
@@ -641,55 +543,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 //     This check may end up modifying node->gtOp1 if it is a cast node that can be removed
 bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node)
 {
-    assert((node->gtHWIntrinsicId == NI_Vector64_Create) || (node->gtHWIntrinsicId == NI_Vector128_Create) ||
-           (node->gtHWIntrinsicId == NI_Vector64_CreateScalarUnsafe) ||
-           (node->gtHWIntrinsicId == NI_Vector128_CreateScalarUnsafe) ||
-           (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector64) ||
-           (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector128) ||
-           (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector64) ||
-           (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector128));
-    assert(HWIntrinsicInfo::lookupNumArgs(node) == 1);
-
-    GenTree* op1    = node->gtOp1;
-    GenTree* castOp = nullptr;
-
-    if (varTypeIsIntegral(node->GetSimdBaseType()) && op1->OperIs(GT_CAST))
-    {
-        // We will sometimes get a cast around a constant value (such as for
-        // certain long constants) which would block the below containment.
-        // So we will temporarily check what the cast is from instead so we
-        // can catch those cases as well.
-
-        castOp = op1->AsCast()->CastOp();
-        op1    = castOp;
-    }
-
-    if (op1->IsCnsIntOrI())
-    {
-        const ssize_t dataValue = op1->AsIntCon()->gtIconVal;
-
-        if (comp->GetEmitter()->emitIns_valid_imm_for_movi(dataValue, emitActualTypeSize(node->GetSimdBaseType())))
-        {
-            if (castOp != nullptr)
-            {
-                // We found a containable immediate under
-                // a cast, so remove the cast from the LIR.
-
-                BlockRange().Remove(node->gtOp1);
-                node->gtOp1 = op1;
-            }
-            return true;
-        }
-    }
-    else if (op1->IsCnsFltOrDbl())
-    {
-        assert(varTypeIsFloating(node->GetSimdBaseType()));
-        assert(castOp == nullptr);
-
-        const double dataValue = op1->AsDblCon()->gtDconVal;
-        return comp->GetEmitter()->emitIns_valid_imm_for_fmov(dataValue);
-    }
-
+    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
     return false;
 }
 
@@ -702,116 +556,7 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node)
 //
 void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
 {
-    NamedIntrinsic intrinsicId     = node->gtHWIntrinsicId;
-    CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
-    var_types      simdBaseType    = node->GetSimdBaseType();
-    unsigned       simdSize        = node->GetSimdSize();
-    var_types      simdType        = Compiler::getSIMDTypeForSize(simdSize);
-
-    assert((intrinsicId == NI_Vector64_op_Equality) || (intrinsicId == NI_Vector64_op_Inequality) ||
-           (intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality));
-
-    assert(varTypeIsSIMD(simdType));
-    assert(varTypeIsArithmetic(simdBaseType));
-    assert(simdSize != 0);
-    assert(node->gtType == TYP_BOOL);
-    assert((cmpOp == GT_EQ) || (cmpOp == GT_NE));
-
-    // We have the following (with the appropriate simd size and where the intrinsic could be op_Inequality):
-    //          /--*  op2  simd
-    //          /--*  op1  simd
-    //   node = *  HWINTRINSIC   simd   T op_Equality
-
-    GenTree* op1 = node->gtGetOp1();
-    GenTree* op2 = node->gtGetOp2();
-
-    NamedIntrinsic cmpIntrinsic;
-
-    switch (simdBaseType)
-    {
-        case TYP_BYTE:
-        case TYP_UBYTE:
-        case TYP_SHORT:
-        case TYP_USHORT:
-        case TYP_INT:
-        case TYP_UINT:
-        case TYP_FLOAT:
-        {
-            cmpIntrinsic = NI_AdvSimd_CompareEqual;
-            break;
-        }
-
-        case TYP_LONG:
-        case TYP_ULONG:
-        case TYP_DOUBLE:
-        {
-            cmpIntrinsic = NI_AdvSimd_Arm64_CompareEqual;
-            break;
-        }
-
-        default:
-        {
-            unreached();
-        }
-    }
-
-    GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, simdBaseJitType, simdSize);
-    BlockRange().InsertBefore(node, cmp);
-    LowerNode(cmp);
-
-    if ((simdBaseType == TYP_FLOAT) && (simdSize == 12))
-    {
-        // For TYP_SIMD12 we don't want the upper bits to participate in the comparison. So, we will insert all ones
-        // into those bits of the result, "as if" the upper bits are equal. Then if all lower bits are equal, we get the
-        // expected all-ones result, and will get the expected 0's only where there are non-matching bits.
-
-        GenTree* idxCns = comp->gtNewIconNode(3, TYP_INT);
-        BlockRange().InsertAfter(cmp, idxCns);
-
-        GenTree* insCns = comp->gtNewIconNode(-1, TYP_INT);
-        BlockRange().InsertAfter(idxCns, insCns);
-
-        GenTree* tmp = comp->gtNewSimdAsHWIntrinsicNode(simdType, cmp, idxCns, insCns, NI_AdvSimd_Insert,
-                                                        CORINFO_TYPE_INT, simdSize);
-        BlockRange().InsertAfter(insCns, tmp);
-        LowerNode(tmp);
-
-        cmp = tmp;
-    }
-
-    GenTree* msk =
-        comp->gtNewSimdHWIntrinsicNode(simdType, cmp, NI_AdvSimd_Arm64_MinAcross, CORINFO_TYPE_UBYTE, simdSize);
-    BlockRange().InsertAfter(cmp, msk);
-    LowerNode(msk);
-
-    GenTree* zroCns = comp->gtNewIconNode(0, TYP_INT);
-    BlockRange().InsertAfter(msk, zroCns);
-
-    GenTree* val =
-        comp->gtNewSimdAsHWIntrinsicNode(TYP_UBYTE, msk, zroCns, NI_AdvSimd_Extract, CORINFO_TYPE_UBYTE, simdSize);
-    BlockRange().InsertAfter(zroCns, val);
-    LowerNode(val);
-
-    zroCns = comp->gtNewIconNode(0, TYP_INT);
-    BlockRange().InsertAfter(val, zroCns);
-
-    node->ChangeOper(cmpOp);
-
-    node->gtType = TYP_INT;
-    node->gtOp1  = val;
-    node->gtOp2  = zroCns;
-
-    // The CompareEqual will set (condition is true) or clear (condition is false) all bits of the respective element
-    // The MinAcross then ensures we get either all bits set (all conditions are true) or clear (any condition is false)
-    // So, we need to invert the condition from the operation since we compare against zero
-
-    GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::NE : GenCondition::EQ;
-    GenTree*     cc     = LowerNodeCC(node, cmpCnd);
-
-    node->gtType = TYP_VOID;
-    node->ClearUnusedValue();
-
-    LowerNode(node);
+    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
 }
 
 //----------------------------------------------------------------------------------------------
@@ -822,279 +567,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
 //
 void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
 {
-    NamedIntrinsic intrinsicId     = node->gtHWIntrinsicId;
-    var_types      simdType        = node->gtType;
-    CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
-    var_types      simdBaseType    = node->GetSimdBaseType();
-    unsigned       simdSize        = node->GetSimdSize();
-    VectorConstant vecCns          = {};
-
-    if ((simdSize == 8) && (simdType == TYP_DOUBLE))
-    {
-        // TODO-Cleanup: Struct retyping means we have the wrong type here. We need to
-        //               manually fix it up so the simdType checks below are correct.
-        simdType = TYP_SIMD8;
-    }
-
-    assert(varTypeIsSIMD(simdType));
-    assert(varTypeIsArithmetic(simdBaseType));
-    assert(simdSize != 0);
-
-    GenTreeArgList* argList = nullptr;
-    GenTree*        op1     = node->gtGetOp1();
-    GenTree*        op2     = node->gtGetOp2();
-
-    // Spare GenTrees to be used for the lowering logic below
-    // Defined upfront to avoid naming conflicts, etc...
-    GenTree* idx  = nullptr;
-    GenTree* tmp1 = nullptr;
-    GenTree* tmp2 = nullptr;
-    GenTree* tmp3 = nullptr;
-
-    assert(op1 != nullptr);
-
-    unsigned argCnt    = 0;
-    unsigned cnsArgCnt = 0;
-
-    if (op1->OperIsList())
-    {
-        assert(op2 == nullptr);
-
-        for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest())
-        {
-            if (HandleArgForHWIntrinsicCreate(argList->Current(), argCnt, vecCns, simdBaseType))
-            {
-                cnsArgCnt += 1;
-            }
-            argCnt += 1;
-        }
-    }
-    else
-    {
-        if (HandleArgForHWIntrinsicCreate(op1, argCnt, vecCns, simdBaseType))
-        {
-            cnsArgCnt += 1;
-        }
-        argCnt += 1;
-
-        if (op2 != nullptr)
-        {
-            if (HandleArgForHWIntrinsicCreate(op2, argCnt, vecCns, simdBaseType))
-            {
-                cnsArgCnt += 1;
-            }
-            argCnt += 1;
-        }
-        else if (cnsArgCnt == 1)
-        {
-            // These intrinsics are meant to set the same value to every element
-            // so we'll just specially handle it here and copy it into the remaining
-            // indices.
-
-            for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
-            {
-                HandleArgForHWIntrinsicCreate(op1, i, vecCns, simdBaseType);
-            }
-        }
-    }
-    assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType))));
-
-    if ((argCnt == cnsArgCnt) && (argCnt == 1))
-    {
-        GenTree* castOp = nullptr;
-
-        if (varTypeIsIntegral(simdBaseType) && op1->OperIs(GT_CAST))
-        {
-            // We will sometimes get a cast around a constant value (such as for
-            // certain long constants) which would block the below containment.
-            // So we will temporarily check what the cast is from instead so we
-            // can catch those cases as well.
-
-            castOp = op1->AsCast()->CastOp();
-            op1    = castOp;
-        }
-
-        if (IsValidConstForMovImm(node))
-        {
-            // Set the cnsArgCnt to zero so we get lowered to a DuplicateToVector
-            // intrinsic, which will itself mark the node as contained.
-            cnsArgCnt = 0;
-
-            // Reacquire op1 as the above check may have removed a cast node and
-            // changed op1.
-            op1 = node->gtOp1;
-        }
-    }
-
-    if (argCnt == cnsArgCnt)
-    {
-        if (op1->OperIsList())
-        {
-            for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest())
-            {
-                BlockRange().Remove(argList->Current());
-            }
-        }
-        else
-        {
-            BlockRange().Remove(op1);
-
-            if (op2 != nullptr)
-            {
-                BlockRange().Remove(op2);
-            }
-        }
-
-        assert((simdSize == 8) || (simdSize == 16));
-
-        if (VectorConstantIsBroadcastedI64(vecCns, simdSize / 8))
-        {
-            // If we are a single constant or if all parts are the same, we might be able to optimize
-            // this even further for certain values, such as Zero or AllBitsSet.
-
-            if (vecCns.i64[0] == 0)
-            {
-                node->gtOp1           = nullptr;
-                node->gtOp2           = nullptr;
-                node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_Zero : NI_Vector128_get_Zero;
-                return;
-            }
-            else if (vecCns.i64[0] == -1)
-            {
-                node->gtOp1           = nullptr;
-                node->gtOp2           = nullptr;
-                node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_AllBitsSet : NI_Vector128_get_AllBitsSet;
-                return;
-            }
-        }
-
-        unsigned  cnsSize  = (simdSize == 12) ? 16 : simdSize;
-        unsigned  cnsAlign = cnsSize;
-        var_types dataType = Compiler::getSIMDTypeForSize(simdSize);
-
-        UNATIVE_OFFSET       cnum = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType);
-        CORINFO_FIELD_HANDLE hnd  = comp->eeFindJitDataOffs(cnum);
-        GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(GT_CLS_VAR_ADDR, TYP_I_IMPL, hnd, nullptr);
-        BlockRange().InsertBefore(node, clsVarAddr);
-
-        node->ChangeOper(GT_IND);
-        node->gtOp1 = clsVarAddr;
-
-        // TODO-ARM64-CQ: We should be able to modify at least the paths that use Insert to trivially support partial
-        // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just
-        // insert the non-constant values which should still allow some gains.
-
-        return;
-    }
-    else if (argCnt == 1)
-    {
-        // We have the following (where simd is simd8 or simd16):
-        //          /--*  op1  T
-        //   node = *  HWINTRINSIC   simd   T Create
-
-        // We will be constructing the following parts:
-        //           /--*  op1  T
-        //   node  = *  HWINTRINSIC   simd   T DuplicateToVector
-
-        // This is roughly the following managed code:
-        //   return AdvSimd.Arm64.DuplicateToVector(op1);
-
-        if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE))
-        {
-            node->gtHWIntrinsicId =
-                (simdType == TYP_SIMD8) ? NI_AdvSimd_Arm64_DuplicateToVector64 : NI_AdvSimd_Arm64_DuplicateToVector128;
-        }
-        else
-        {
-            node->gtHWIntrinsicId =
-                (simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64 : NI_AdvSimd_DuplicateToVector128;
-        }
-        return;
-    }
-
-    // We have the following (where simd is simd8 or simd16):
-    //          /--*  op1 T
-    //          +--*  ... T
-    //          +--*  opN T
-    //   node = *  HWINTRINSIC   simd   T Create
-
-    if (op1->OperIsList())
-    {
-        argList = op1->AsArgList();
-        op1     = argList->Current();
-        argList = argList->Rest();
-    }
-
-    // We will be constructing the following parts:
-    //          /--*  op1  T
-    //   tmp1 = *  HWINTRINSIC   simd8  T CreateScalarUnsafe
-    //   ...
-
-    // This is roughly the following managed code:
-    //   var tmp1 = Vector64.CreateScalarUnsafe(op1);
-    //   ...
-
-    NamedIntrinsic createScalarUnsafe =
-        (simdType == TYP_SIMD8) ? NI_Vector64_CreateScalarUnsafe : NI_Vector128_CreateScalarUnsafe;
-
-    tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, createScalarUnsafe, simdBaseJitType, simdSize);
-    BlockRange().InsertAfter(op1, tmp1);
-    LowerNode(tmp1);
-
-    unsigned N   = 0;
-    GenTree* opN = nullptr;
-
-    for (N = 1; N < argCnt - 1; N++)
-    {
-        // We will be constructing the following parts:
-        //   ...
-        //   idx  =    CNS_INT       int    N
-        //          /--*  tmp1 simd
-        //          +--*  idx  int
-        //          +--*  opN  T
-        //   tmp1 = *  HWINTRINSIC   simd   T Insert
-        //   ...
-
-        // This is roughly the following managed code:
-        //   ...
-        //   tmp1 = AdvSimd.Insert(tmp1, N, opN);
-        //   ...
-
-        opN = argList->Current();
-
-        idx = comp->gtNewIconNode(N, TYP_INT);
-        BlockRange().InsertBefore(opN, idx);
-
-        tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, idx, opN, NI_AdvSimd_Insert, simdBaseJitType, simdSize);
-        BlockRange().InsertAfter(opN, tmp1);
-        LowerNode(tmp1);
-
-        argList = argList->Rest();
-    }
-
-    assert(N == (argCnt - 1));
-
-    // We will be constructing the following parts:
-    //   idx  =    CNS_INT       int    N
-    //          /--*  tmp1 simd
-    //          +--*  idx  int
-    //          +--*  opN  T
-    //   node = *  HWINTRINSIC   simd   T Insert
-
-    // This is roughly the following managed code:
-    //   ...
-    //   tmp1 = AdvSimd.Insert(tmp1, N, opN);
-    //   ...
-
-    opN = (argCnt == 2) ? op2 : argList->Current();
-
-    idx = comp->gtNewIconNode(N, TYP_INT);
-    BlockRange().InsertBefore(opN, idx);
-
-    node->gtOp1 = comp->gtNewArgList(tmp1, idx, opN);
-    node->gtOp2 = nullptr;
-
-    node->gtHWIntrinsicId = NI_AdvSimd_Insert;
+    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
 }
 
 //----------------------------------------------------------------------------------------------
@@ -1105,233 +578,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
 //
 void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
 {
-    NamedIntrinsic intrinsicId     = node->gtHWIntrinsicId;
-    CorInfoType    simdBaseJitType = node->GetSimdBaseJitType();
-    var_types      simdBaseType    = node->GetSimdBaseType();
-    unsigned       simdSize        = node->GetSimdSize();
-    var_types      simdType        = Compiler::getSIMDTypeForSize(simdSize);
-
-    assert((intrinsicId == NI_Vector64_Dot) || (intrinsicId == NI_Vector128_Dot));
-    assert(varTypeIsSIMD(simdType));
-    assert(varTypeIsArithmetic(simdBaseType));
-    assert(simdSize != 0);
-
-    GenTree* op1 = node->gtGetOp1();
-    GenTree* op2 = node->gtGetOp2();
-
-    assert(op1 != nullptr);
-    assert(op2 != nullptr);
-    assert(!op1->OperIsList());
-
-    // Spare GenTrees to be used for the lowering logic below
-    // Defined upfront to avoid naming conflicts, etc...
-    GenTree* idx  = nullptr;
-    GenTree* tmp1 = nullptr;
-    GenTree* tmp2 = nullptr;
-
-    if (simdSize == 12)
-    {
-        assert(simdBaseType == TYP_FLOAT);
-
-        // For 12 byte SIMD, we need to clear the upper 4 bytes:
-        //   idx  =    CNS_INT       int    0x03
-        //   tmp1 = *  CNS_DLB       float  0.0
-        //          /--*  op1  simd16
-        //          +--*  idx  int
-        //          +--*  tmp1 simd16
-        //   op1  = *  HWINTRINSIC   simd16 T Insert
-        //   ...
-
-        // This is roughly the following managed code:
-        //    op1 = AdvSimd.Insert(op1, 0x03, 0.0f);
-        //    ...
-
-        idx = comp->gtNewIconNode(0x03, TYP_INT);
-        BlockRange().InsertAfter(op1, idx);
-
-        tmp1 = comp->gtNewZeroConNode(TYP_FLOAT);
-        BlockRange().InsertAfter(idx, tmp1);
-        LowerNode(tmp1);
-
-        op1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op1, idx, tmp1, NI_AdvSimd_Insert, simdBaseJitType, simdSize);
-        BlockRange().InsertAfter(tmp1, op1);
-        LowerNode(op1);
-
-        idx = comp->gtNewIconNode(0x03, TYP_INT);
-        BlockRange().InsertAfter(op2, idx);
-
-        tmp2 = comp->gtNewZeroConNode(TYP_FLOAT);
-        BlockRange().InsertAfter(idx, tmp2);
-        LowerNode(tmp2);
-
-        op2 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op2, idx, tmp2, NI_AdvSimd_Insert, simdBaseJitType, simdSize);
-        BlockRange().InsertAfter(tmp2, op2);
-        LowerNode(op2);
-    }
-
-    // We will be constructing the following parts:
-    //   ...
-    //          /--*  op1  simd16
-    //          +--*  op2  simd16
-    //   tmp1 = *  HWINTRINSIC   simd16 T Multiply
-    //   ...
-
-    // This is roughly the following managed code:
-    //   ...
-    //   var tmp1 = AdvSimd.Multiply(op1, op2);
-    //   ...
-
-    NamedIntrinsic multiply = (simdBaseType == TYP_DOUBLE) ? NI_AdvSimd_Arm64_Multiply : NI_AdvSimd_Multiply;
-    assert(!varTypeIsLong(simdBaseType));
-
-    tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, op1, op2, multiply, simdBaseJitType, simdSize);
-    BlockRange().InsertBefore(node, tmp1);
-    LowerNode(tmp1);
-
-    if (varTypeIsFloating(simdBaseType))
-    {
-        // We will be constructing the following parts:
-        //   ...
-        //          /--*  tmp1 simd16
-        //          *  STORE_LCL_VAR simd16
-        //   tmp1 =    LCL_VAR       simd16
-        //   tmp2 =    LCL_VAR       simd16
-        //   ...
-
-        // This is roughly the following managed code:
-        //   ...
-        //   var tmp2 = tmp1;
-        //   ...
-
-        node->gtOp1 = tmp1;
-        LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node);
-        ReplaceWithLclVar(tmp1Use);
-        tmp1 = node->gtOp1;
-
-        tmp2 = comp->gtClone(tmp1);
-        BlockRange().InsertAfter(tmp1, tmp2);
-
-        if (simdSize == 8)
-        {
-            assert(simdBaseType == TYP_FLOAT);
-
-            // We will be constructing the following parts:
-            //   ...
-            //          /--*  tmp1 simd8
-            //          +--*  tmp2 simd8
-            //   tmp1 = *  HWINTRINSIC   simd8  T AddPairwise
-            //   ...
-
-            // This is roughly the following managed code:
-            //   ...
-            //   var tmp1 = AdvSimd.AddPairwise(tmp1, tmp2);
-            //   ...
-
-            tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_AddPairwise, simdBaseJitType,
-                                                    simdSize);
-            BlockRange().InsertAfter(tmp2, tmp1);
-            LowerNode(tmp1);
-        }
-        else
-        {
-            assert((simdSize == 12) || (simdSize == 16));
-
-            // We will be constructing the following parts:
-            //   ...
-            //          /--*  tmp1 simd16
-            //          +--*  tmp2 simd16
-            //   tmp2 = *  HWINTRINSIC   simd16 T AddPairwise
-            //   ...
-
-            // This is roughly the following managed code:
-            //   ...
-            //   var tmp1 = AdvSimd.Arm64.AddPairwise(tmp1, tmp2);
-            //   ...
-
-            tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_Arm64_AddPairwise, simdBaseJitType,
-                                                    simdSize);
-            BlockRange().InsertAfter(tmp2, tmp1);
-            LowerNode(tmp1);
-
-            if (simdBaseType == TYP_FLOAT)
-            {
-                // Float needs an additional pairwise add to finish summing the parts
-                // The first will have summed e0 with e1 and e2 with e3 and then repeats that for the upper half
-                // So, we will have a vector that looks like this:
-                //    < e0 + e1, e2 + e3, e0 + e1, e2 + e3>
-                // Doing a second horizontal add with itself will then give us
-                //    e0 + e1 + e2 + e3 in all elements of the vector
-
-                // We will be constructing the following parts:
-                //   ...
-                //          /--*  tmp1 simd16
-                //          *  STORE_LCL_VAR simd16
-                //   tmp1 =    LCL_VAR       simd16
-                //   tmp2 =    LCL_VAR       simd16
-                //          /--*  tmp1 simd16
-                //          +--*  tmp2 simd16
-                //   tmp2 = *  HWINTRINSIC   simd16 T AddPairwise
-                //   ...
-
-                // This is roughly the following managed code:
-                //   ...
-                //   var tmp2 = tmp1;
-                //   var tmp1 = AdvSimd.Arm64.AddPairwise(tmp1, tmp2);
-                //   ...
-
-                node->gtOp1 = tmp1;
-                LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node);
-                ReplaceWithLclVar(tmp1Use);
-                tmp1 = node->gtOp1;
-
-                tmp2 = comp->gtClone(tmp1);
-                BlockRange().InsertAfter(tmp1, tmp2);
-
-                tmp1 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, tmp2, NI_AdvSimd_Arm64_AddPairwise,
-                                                        simdBaseJitType, simdSize);
-                BlockRange().InsertAfter(tmp2, tmp1);
-                LowerNode(tmp1);
-            }
-        }
-
-        tmp2 = tmp1;
-    }
-    else
-    {
-        assert(varTypeIsIntegral(simdBaseType));
-
-        // We will be constructing the following parts:
-        //   ...
-        //          /--*  tmp1 simd16
-        //   tmp2 = *  HWINTRINSIC   simd16 T AddAcross
-        //   ...
-
-        // This is roughly the following managed code:
-        //   ...
-        //   var tmp2 = AdvSimd.Arm64.AddAcross(tmp1);
-        //   ...
-
-        tmp2 = comp->gtNewSimdAsHWIntrinsicNode(simdType, tmp1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, simdSize);
-        BlockRange().InsertAfter(tmp1, tmp2);
-        LowerNode(tmp2);
-    }
-
-    // We will be constructing the following parts:
-    //   ...
-    //          /--*  tmp2 simd16
-    //   node = *  HWINTRINSIC   simd16 T ToScalar
-
-    // This is roughly the following managed code:
-    //   ...
-    //   return tmp2.ToScalar();
-
-    node->gtOp1 = tmp2;
-    node->gtOp2 = nullptr;
-
-    node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar;
-    LowerNode(node);
-
-    return;
+    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
 }
 
 #endif // FEATURE_HW_INTRINSICS
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 66745063b96dd..d70273386eacf 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -396,11 +396,13 @@ int LinearScan::BuildNode(GenTree* tree)
 
         case GT_CMPXCHG:
         {
+            NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+
             GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
             srcCount                    = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
             assert(dstCount == 1);
 
-            if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
+            // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
             {
                 // For LOONGARCH exclusives requires a single internal register
                 buildInternalIntRegisterDefForNode(tree);
@@ -422,7 +424,7 @@ int LinearScan::BuildNode(GenTree* tree)
 
                 // For LOONGARCH exclusives the lifetime of the comparand must be extended because
                 // it may be used used multiple during retries
-                if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
+                // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
                 {
                     setDelayFree(comparandUse);
                 }
@@ -441,9 +443,12 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_XADD:
         case GT_XCHG:
         {
+            NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+
             assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
             srcCount = tree->gtGetOp2()->isContained() ? 1 : 2;
 
+#if 0
             if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
             {
                 // GT_XCHG requires a single internal register; the others require two.
@@ -458,6 +463,7 @@ int LinearScan::BuildNode(GenTree* tree)
                 // for ldclral we need an internal register.
                 buildInternalIntRegisterDefForNode(tree);
             }
+#endif
 
             assert(!tree->gtGetOp1()->isContained());
             RefPosition* op1Use = BuildUse(tree->gtGetOp1());
@@ -467,9 +473,9 @@ int LinearScan::BuildNode(GenTree* tree)
                 op2Use = BuildUse(tree->gtGetOp2());
             }
 
-            // For LOONGARCH exclusives the lifetime of the addr and data must be extended because
+            // For LOONGARCH64 exclusives the lifetime of the addr and data must be extended because
             // it may be used used multiple during retries
-            if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
+            // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
             {
                 // Internals may not collide with target
                 if (dstCount == 1)
diff --git a/src/coreclr/pal/inc/rt/ntimage.h b/src/coreclr/pal/inc/rt/ntimage.h
index cd56b305aed48..e6970cca7cd38 100644
--- a/src/coreclr/pal/inc/rt/ntimage.h
+++ b/src/coreclr/pal/inc/rt/ntimage.h
@@ -240,6 +240,7 @@ typedef struct _IMAGE_FILE_HEADER {
 #define IMAGE_FILE_MACHINE_M32R              0x9041  // M32R little-endian
 #define IMAGE_FILE_MACHINE_ARM64             0xAA64  // ARM64 Little-Endian
 #define IMAGE_FILE_MACHINE_CEE               0xC0EE
+#define IMAGE_FILE_MACHINE_LOONGARCH64       0x6264  // LOONGARCH64.
 
 //
 // Directory format.

From df5b3d356bbf0767ca9500e4e6f1755863ec1831 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng-hf@loongson.cn>
Date: Thu, 24 Feb 2022 10:30:30 +0800
Subject: [PATCH 28/46] [LoongArch64] apply the format and fix compiling
 warning.

---
 src/coreclr/inc/crosscomp.h       | 11 ++++-------
 src/coreclr/jit/codegencommon.cpp |  4 ++--
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/coreclr/inc/crosscomp.h b/src/coreclr/inc/crosscomp.h
index 63a48d0e4ceea..1a7fdb37b9c25 100644
--- a/src/coreclr/inc/crosscomp.h
+++ b/src/coreclr/inc/crosscomp.h
@@ -399,7 +399,7 @@ enum
 
 #define CONTEXT_UNWOUND_TO_CALL 0x20000000
 
-typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT {
+typedef struct DECLSPEC_ALIGN(8) _T_CONTEXT {
 
     //
     // Control flags.
@@ -414,8 +414,8 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT {
     DWORD64 Ra;
     DWORD64 Tp;
     DWORD64 Sp;
-    DWORD64 A0;//DWORD64 V0;
-    DWORD64 A1;//DWORD64 V1;
+    DWORD64 A0;
+    DWORD64 A1;
     DWORD64 A2;
     DWORD64 A3;
     DWORD64 A4;
@@ -447,7 +447,7 @@ typedef struct DECLSPEC_ALIGN(16) _T_CONTEXT {
     //
     // Floating Point Registers
     //
-    //TODO: support the SIMD.
+    //TODO-LoongArch64: support the SIMD.
     DWORD64 F[32];
     DWORD   Fcsr;
 } T_CONTEXT, *PT_CONTEXT;
@@ -469,7 +469,6 @@ typedef struct _T_RUNTIME_FUNCTION {
     };
 } T_RUNTIME_FUNCTION, *PT_RUNTIME_FUNCTION;
 
-
 //
 // Define exception dispatch context structure.
 //
@@ -489,8 +488,6 @@ typedef struct _T_DISPATCHER_CONTEXT {
     PBYTE  NonVolatileRegisters;
 } T_DISPATCHER_CONTEXT, *PT_DISPATCHER_CONTEXT;
 
-
-
 //
 // Nonvolatile context pointer record.
 //
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 5900d163d4c41..dbf711d70d404 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -6029,7 +6029,7 @@ void CodeGen::genFnProlog()
 
 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     genPushCalleeSavedRegisters(initReg, &initRegZeroed);
-#else // !TARGET_ARM64 || !TARGET_LOONGARCH64
+#else  // !TARGET_ARM64 || !TARGET_LOONGARCH64
     if (!isOSRx64Root)
     {
         genPushCalleeSavedRegisters();
@@ -8128,7 +8128,7 @@ void CodeGen::genStructReturn(GenTree* treeNode)
             assert(regCount == 2);
             int offset = genTypeSize(type);
             type       = retTypeDesc.GetReturnRegType(1);
-            offset     = offset < genTypeSize(type) ? genTypeSize(type) : offset;
+            offset     = (int)((unsigned int)offset < genTypeSize(type) ? genTypeSize(type) : offset);
             toReg      = retTypeDesc.GetABIReturnReg(1);
             GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
         }

From b912e84119a34383413da034218e07ddeebf9e71 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 25 Feb 2022 18:16:31 +0800
Subject: [PATCH 29/46] [LoongArch64] round 1 amend for reviewing by
 @kunalspathak.

---
 src/coreclr/jit/codegencommon.cpp      |   4 +-
 src/coreclr/jit/codegenloongarch64.cpp |  14 +-
 src/coreclr/jit/emit.cpp               |  29 +-
 src/coreclr/jit/emitloongarch64.cpp    |   6 +-
 src/coreclr/jit/emitloongarch64.h      |  10 +-
 src/coreclr/jit/importer.cpp           |   8 +-
 src/coreclr/jit/lsra.cpp               |  23 +-
 src/coreclr/jit/lsra.h                 |  15 +-
 src/coreclr/jit/lsrabuild.cpp          |   6 +-
 src/coreclr/jit/lsraloongarch64.cpp    | 491 ++++---------------------
 src/coreclr/jit/registerloongarch64.h  |   5 +
 11 files changed, 131 insertions(+), 480 deletions(-)

diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index dbf711d70d404..c1c990f627c62 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -6029,7 +6029,9 @@ void CodeGen::genFnProlog()
 
 #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     genPushCalleeSavedRegisters(initReg, &initRegZeroed);
-#else  // !TARGET_ARM64 || !TARGET_LOONGARCH64
+
+#else // !TARGET_ARM64 || !TARGET_LOONGARCH64
+
     if (!isOSRx64Root)
     {
         genPushCalleeSavedRegisters();
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index cfc8b6587cd3e..af586a60e0715 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -21,11 +21,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "gcinfo.h"
 #include "gcinfoencoder.h"
 
-static short splitLow(int value)
-{
-    return (value & 0xffff);
-}
-
 // Returns true if 'value' is a legal signed immediate 12 bit encoding.
 static bool isValidSimm12(ssize_t value)
 {
@@ -3951,8 +3946,8 @@ void CodeGen::genCkfinite(GenTree* treeNode)
     GenTree*  op1        = treeNode->AsOp()->gtOp1;
     var_types targetType = treeNode->TypeGet();
     ssize_t   expMask    = (targetType == TYP_FLOAT) ? 0xFF : 0x7FF; // Bit mask to extract exponent.
-    ssize_t   size       = (targetType == TYP_FLOAT) ? 8 : 11;       // Bit size to extract exponent.
-    ssize_t   pos        = (targetType == TYP_FLOAT) ? 23 : 52;      // Bit pos of exponent.
+    int       size       = (targetType == TYP_FLOAT) ? 8 : 11;       // Bit size to extract exponent.
+    int       pos        = (targetType == TYP_FLOAT) ? 23 : 52;      // Bit pos of exponent.
 
     emitter* emit = GetEmitter();
     emitAttr attr = emitActualTypeSize(treeNode);
@@ -6086,7 +6081,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
 
             // unsigned gcPtrCount; // The count of GC pointers in the struct
             unsigned srcSize;
-            bool     isHfa;
 
             // gcPtrCount = treeNode->gtNumSlots;
             // Setup the srcSize and layout
@@ -6570,7 +6564,7 @@ void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
             offset = genTypeSize(type);
             type   = pRetTypeDesc->GetReturnRegType(1);
             reg    = call->GetRegNumByIdx(1);
-            offset = offset < genTypeSize(type) ? genTypeSize(type) : offset;
+            offset = (offset < (int)genTypeSize(type)) ? genTypeSize(type) : offset;
             GetEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
         }
 
@@ -9489,7 +9483,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
         }
         else
         {
-            for (int i = 0; i < regArgNum; i++)
+            for (unsigned i = 0; i < regArgNum; i++)
             {
                 LclVarDsc* varDsc2     = compiler->lvaTable + regArgsVars[i];
                 var_types  destMemType = varDsc2->GetRegisterType();
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index c371a33f03cda..30c28972ac01d 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -4217,8 +4217,10 @@ void emitter::emitJumpDistBind()
                                         // offset of the jump
         UNATIVE_OFFSET dstOffs;
         NATIVE_OFFSET  jmpDist; // the relative jump distance, as it will be encoded
+#ifndef TARGET_LOONGARCH64
         UNATIVE_OFFSET oldSize;
         UNATIVE_OFFSET sizeDif;
+#endif
 
 #ifdef TARGET_XARCH
         assert(jmp->idInsFmt() == IF_LABEL || jmp->idInsFmt() == IF_RWR_LABEL || jmp->idInsFmt() == IF_SWR_LABEL);
@@ -4635,10 +4637,10 @@ void emitter::emitJumpDistBind()
 
                 jmp->idInsOpt(INS_OPTS_JIRL);
                 jmp->idCodeSize(jmp->idCodeSize() + extra);
-                jmpIG->igSize += extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
-                adjLJ += extra;
-                adjIG += extra;
-                emitTotalCodeSize += extra;
+                jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                adjLJ += (UNATIVE_OFFSET)extra;
+                adjIG += (UNATIVE_OFFSET)extra;
+                emitTotalCodeSize += (UNATIVE_OFFSET)extra;
                 jmpIG->igFlags |= IGF_UPD_ISZ;
                 isLinkingEnd_LA |= 0x1;
             }
@@ -4711,7 +4713,7 @@ void emitter::emitJumpDistBind()
                     }
                     else
                     {
-                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO:later will be deleted!!!
+                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
                         extra = 8;
                     }
                 }
@@ -4724,20 +4726,20 @@ void emitter::emitJumpDistBind()
                     // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
                     assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
                 }
-                else // if (ins == INS_b || ins == INS_bl)
+                else
                 {
                     assert(ins == INS_b || ins == INS_bl);
                     // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
-                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
                     continue;
                 }
 
                 jmp->idInsOpt(INS_OPTS_JIRL);
                 jmp->idCodeSize(jmp->idCodeSize() + extra);
-                jmpIG->igSize += extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
-                adjLJ += extra;
-                adjIG += extra;
-                emitTotalCodeSize += extra;
+                jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                adjLJ += (UNATIVE_OFFSET)extra;
+                adjIG += (UNATIVE_OFFSET)extra;
+                emitTotalCodeSize += (UNATIVE_OFFSET)extra;
                 jmpIG->igFlags |= IGF_UPD_ISZ;
                 isLinkingEnd_LA |= 0x1;
             }
@@ -4884,8 +4886,6 @@ void emitter::emitJumpDistBind()
         // The size of IF_LARGEJMP/IF_LARGEADR/IF_LARGELDC are 8 or 12.
         // All other code size is 4.
         assert((sizeDif == 4) || (sizeDif == 8));
-#elif defined(TARGET_LOONGARCH64)
-        assert(sizeDif == 0);
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -6815,8 +6815,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
     }
 
 #ifdef TARGET_LOONGARCH64
-    // cp = cp - 4;
-    unsigned actualCodeSize = cp - codeBlock;
+    unsigned actualCodeSize = (unsigned)(cp - codeBlock);
 #endif
 
 #if EMIT_TRACK_STACK_DEPTH
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index dbf81a1e207fe..eb0b8659632bb 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -2496,9 +2496,9 @@ void emitter::emitIns_Call(EmitCallType          callType,
 
         assert(callType == EC_FUNC_TOKEN);
         assert(addr != NULL);
-        assert(((long)addr & 3) == 0);
+        assert((((size_t)addr) & 3) == 0);
 
-        addr                  = (void*)((long)addr + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0
+        addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0
         id->idAddr()->iiaAddr = (BYTE*)addr;
 
         if (emitComp->opts.compReloc)
@@ -2599,7 +2599,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
 
         *(code_t*)dst = 0x1e00000e;
 
-        long addr = (long)id->idAddr()->iiaAddr; // get addr.
+        size_t addr = (size_t)(id->idAddr()->iiaAddr); // get addr.
         // should assert(addr-dst < 38bits);
 
         int reg2 = (int)addr & 1;
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
index 2c9cbfd211bf1..cbeb66f7ded82 100644
--- a/src/coreclr/jit/emitloongarch64.h
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -77,6 +77,11 @@ bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src
 bool IsRedundantLdStr(
     instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end.
 
+/************************************************************************/
+/*           Public inline informational methods                        */
+/************************************************************************/
+
+public:
 // Returns true if 'value' is a legal signed immediate 12 bit encoding.
 static bool isValidSimm12(ssize_t value)
 {
@@ -89,11 +94,6 @@ static bool isValidSimm20(ssize_t value)
     return -(((int)1) << 19) <= value && value < (((int)1) << 19);
 };
 
-/************************************************************************/
-/*           Public inline informational methods                        */
-/************************************************************************/
-
-public:
 // Returns the number of bits used by the given 'size'.
 inline static unsigned getBitWidth(emitAttr size)
 {
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 1ace0c67a39e3..e8658621055bb 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -13742,9 +13742,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 #ifdef TARGET_LOONGARCH64
                 assertImp((genActualType(op1->TypeGet()) == TYP_LONG || genActualType(op1->TypeGet()) == TYP_INT) ||
                           (genActualType(op2->TypeGet()) == TYP_LONG || genActualType(op2->TypeGet()) == TYP_INT) ||
-                          genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
-                          varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet()) ||
-                          varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType));
+                          (genActualType(op1->TypeGet()) == genActualType(op2->TypeGet())) ||
+                          (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) ||
+                          (varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType)));
 #else
                 assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
                           (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) ||
@@ -14035,7 +14035,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                             return;
                     }
 
-                    op1->gtType == TYP_INT;
+                    op1->gtType = TYP_INT;
 
                     impPushOnStack(op1, tiRetVal);
                     break;
diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
index c0a2feaef75ff..bcb834ab3e0f9 100644
--- a/src/coreclr/jit/lsra.cpp
+++ b/src/coreclr/jit/lsra.cpp
@@ -1573,16 +1573,19 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
 #endif // FEATURE_SIMD
 
         case TYP_STRUCT:
-// TODO-1stClassStructs: support vars with GC pointers. The issue is that such
-// vars will have `lvMustInit` set, because emitter has poor support for struct liveness,
-// but if the variable is tracked the prolog generator would expect it to be in liveIn set,
-// so an assert in `genFnProlog` will fire.
+        {
+            // TODO-1stClassStructs: support vars with GC pointers. The issue is that such
+            // vars will have `lvMustInit` set, because emitter has poor support for struct liveness,
+            // but if the variable is tracked the prolog generator would expect it to be in liveIn set,
+            // so an assert in `genFnProlog` will fire.
+            bool isRegCandidate = compiler->compEnregStructLocals() && !varDsc->HasGCPtr();
 #ifdef TARGET_LOONGARCH64
-            return !genIsValidFloatReg(varDsc->GetOtherArgReg()) && compiler->compEnregStructLocals() &&
-                   !varDsc->HasGCPtr();
-#else
-            return compiler->compEnregStructLocals() && !varDsc->HasGCPtr();
+            // The LoongArch64's ABI which the float args within a struct maybe passed by integer register
+            // when no float register left but free integer register.
+            isRegCandidate &= !genIsValidFloatReg(varDsc->GetOtherArgReg());
 #endif
+            return isRegCandidate;
+        }
 
         case TYP_UNDEF:
         case TYP_UNKNOWN:
@@ -7728,7 +7731,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
             }
         }
     }
-#endif
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
     VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap;
     regMaskTP   sameWriteRegs   = RBM_NONE;
@@ -7808,7 +7811,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
             {
                 sameToReg = REG_NA;
             }
-#endif
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
             // If the var is live only at those blocks connected by a split edge and not live-in at some of the
             // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution
diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h
index 279ac5292ec0d..a916d10abaf44 100644
--- a/src/coreclr/jit/lsra.h
+++ b/src/coreclr/jit/lsra.h
@@ -2216,17 +2216,12 @@ class RefPosition
     // no reg is allocated.
     unsigned char regOptional : 1;
 
-// Used by RefTypeDef/Use positions of a multi-reg call node.
-// Indicates the position of the register that this ref position refers to.
-// The max bits needed is based on max value of MAX_RET_REG_COUNT value
-// across all targets and that happens 4 on on Arm.  Hence index value
-// would be 0..MAX_RET_REG_COUNT-1.
-#ifdef TARGET_LOONGARCH64
-    // TODO for LOONGARCH64: should confirm for ArgSplit?
-    unsigned char multiRegIdx : 3;
-#else  // !TARGET_LOONGARCH64
+    // Used by RefTypeDef/Use positions of a multi-reg call node.
+    // Indicates the position of the register that this ref position refers to.
+    // The max bits needed is based on max value of MAX_RET_REG_COUNT value
+    // across all targets and that happens 4 on on Arm.  Hence index value
+    // would be 0..MAX_RET_REG_COUNT-1.
     unsigned char multiRegIdx : 2;
-#endif // !TARGET_LOONGARCH64
 
     // Last Use - this may be true for multiple RefPositions in the same Interval
     unsigned char lastUse : 1;
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index b6bdb51884cc4..bba2b26ed5bba 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -594,6 +594,8 @@ RefPosition* LinearScan::newRefPosition(Interval*    theInterval,
         RefPosition* pos         = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
         assert(theInterval != nullptr);
 #ifndef TARGET_LOONGARCH64
+        // The LoongArch64's ABI which the float args maybe passed by integer register
+        // when no float register left but free integer register.
         assert((allRegs(theInterval->registerType) & mask) != 0);
 #endif
     }
@@ -3946,8 +3948,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree)
     srcCandidates  = RBM_WRITE_BARRIER_SRC;
 
 #elif defined(TARGET_LOONGARCH64)
-    // the 'addr' goes into (REG_WRITE_BARRIER_DST)
-    // the 'src'  goes into (REG_WRITE_BARRIER_SRC)
+    // the 'addr' goes into t6 (REG_WRITE_BARRIER_DST)
+    // the 'src'  goes into t7 (REG_WRITE_BARRIER_SRC)
     //
     addrCandidates = RBM_WRITE_BARRIER_DST;
     srcCandidates  = RBM_WRITE_BARRIER_SRC;
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index d70273386eacf..0c5d995953bdb 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -105,7 +105,7 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_STORE_LCL_VAR:
             if (tree->IsMultiRegLclVar() && isCandidateMultiRegLclVar(tree->AsLclVar()))
             {
-                dstCount = compiler->lvaGetDesc(tree->AsLclVar()->GetLclNum())->lvFieldCnt;
+                dstCount = compiler->lvaGetDesc(tree->AsLclVar())->lvFieldCnt;
             }
             FALLTHROUGH;
 
@@ -143,19 +143,10 @@ int LinearScan::BuildNode(GenTree* tree)
 
         case GT_CNS_DBL:
         {
-            GenTreeDblCon* dblConst   = tree->AsDblCon();
-            double         constValue = dblConst->AsDblCon()->gtDconVal;
-
-            if ((constValue == (double)(int)constValue) && (-2048 <= constValue) && (constValue <= 2047))
-            {
-                // Directly encode constant to instructions.
-            }
-            else
-            {
-                // Reserve int to load constant from memory (IF_LARGELDC)
-                buildInternalIntRegisterDefForNode(tree);
-                buildInternalRegisterUses();
-            }
+            // There is no instruction for loading float/double imm directly into FPR.
+            // Reserve int to load constant from memory (IF_LARGELDC)
+            buildInternalIntRegisterDefForNode(tree);
+            buildInternalRegisterUses();
         }
             FALLTHROUGH;
 
@@ -263,13 +254,6 @@ int LinearScan::BuildNode(GenTree* tree)
                 // everything is made explicit by adding casts.
                 assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
             }
-
-            if (tree->gtOverflow())
-            {
-                // Need a register different from target reg to check for overflow.
-                buildInternalIntRegisterDefForNode(tree);
-                setInternalRegsDelayFree = true;
-            }
             FALLTHROUGH;
 
         case GT_AND:
@@ -295,27 +279,21 @@ int LinearScan::BuildNode(GenTree* tree)
             BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
             break;
 
-        // case GT_MOD:
-        // case GT_UMOD:
-        //    NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in LOONGARCH64");
-        //    assert(!"Shouldn't see an integer typed GT_MOD node in LOONGARCH64");
-        //    srcCount = 0;
-        //    break;
-
         case GT_MUL:
+            if (tree->gtOverflow())
+            {
+                // Need a register different from target reg to check for overflow.
+                buildInternalIntRegisterDefForNode(tree);
+                setInternalRegsDelayFree = true;
+            }
+            FALLTHROUGH;
+
         case GT_MOD:
         case GT_UMOD:
         case GT_DIV:
         case GT_MULHI:
         case GT_UDIV:
         {
-            if (emitActualTypeSize(tree) == EA_4BYTE)
-            {
-                // We need two registers: tmpRegOp1 and tmpRegOp2
-                buildInternalIntRegisterDefForNode(tree);
-                buildInternalIntRegisterDefForNode(tree);
-            }
-
             srcCount = BuildBinaryUses(tree->AsOp());
             buildInternalRegisterUses();
             assert(dstCount == 1);
@@ -351,7 +329,7 @@ int LinearScan::BuildNode(GenTree* tree)
 
 #ifdef FEATURE_HW_INTRINSICS
         case GT_HWINTRINSIC:
-            srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic());
+            srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic(), &dstCount);
             break;
 #endif // FEATURE_HW_INTRINSICS
 
@@ -375,13 +353,6 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_GE:
         case GT_GT:
         case GT_JCMP:
-            if (!varTypeIsFloating(tree->gtGetOp1()))
-            {
-                // We need two registers: tmpRegOp1 and tmpRegOp2
-                buildInternalIntRegisterDefForNode(tree);
-                buildInternalIntRegisterDefForNode(tree);
-                buildInternalRegisterUses();
-            }
             srcCount = BuildCmp(tree);
             break;
 
@@ -444,55 +415,7 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_XCHG:
         {
             NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
-
-            assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
-            srcCount = tree->gtGetOp2()->isContained() ? 1 : 2;
-
-#if 0
-            if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
-            {
-                // GT_XCHG requires a single internal register; the others require two.
-                buildInternalIntRegisterDefForNode(tree);
-                if (tree->OperGet() != GT_XCHG)
-                {
-                    buildInternalIntRegisterDefForNode(tree);
-                }
-            }
-            else if (tree->OperIs(GT_XAND))
-            {
-                // for ldclral we need an internal register.
-                buildInternalIntRegisterDefForNode(tree);
-            }
-#endif
-
-            assert(!tree->gtGetOp1()->isContained());
-            RefPosition* op1Use = BuildUse(tree->gtGetOp1());
-            RefPosition* op2Use = nullptr;
-            if (!tree->gtGetOp2()->isContained())
-            {
-                op2Use = BuildUse(tree->gtGetOp2());
-            }
-
-            // For LOONGARCH64 exclusives the lifetime of the addr and data must be extended because
-            // it may be used used multiple during retries
-            // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
-            {
-                // Internals may not collide with target
-                if (dstCount == 1)
-                {
-                    setDelayFree(op1Use);
-                    if (op2Use != nullptr)
-                    {
-                        setDelayFree(op2Use);
-                    }
-                    setInternalRegsDelayFree = true;
-                }
-                buildInternalRegisterUses();
-            }
-            if (dstCount == 1)
-            {
-                BuildDef(tree);
-            }
+            srcCount = 1;
         }
         break;
 
@@ -501,7 +424,7 @@ int LinearScan::BuildNode(GenTree* tree)
             srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
             dstCount = tree->AsPutArgSplit()->gtNumRegs;
             break;
-#endif // FEATURE _SPLIT_ARG
+#endif // FEATURE_ARG_SPLIT
 
         case GT_PUTARG_STK:
             srcCount = BuildPutArgStk(tree->AsPutArgStk());
@@ -553,14 +476,14 @@ int LinearScan::BuildNode(GenTree* tree)
         {
             assert(dstCount == 1);
 
-            // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
+            // Need a variable number of temp regs (see genLclHeap() in codegenloongarch64.cpp):
             // Here '-' means don't care.
             //
             //  Size?                   Init Memory?    # temp regs
             //   0                          -               0
-            //   const and <=6 ptr words    -               0
+            //   const and <=UnrollLimit    -               0
             //   const and <PageSize        No              0
-            //   >6 ptr words               Yes             0
+            //   >UnrollLimit               Yes             0
             //   Non-const                  Yes             0
             //   Non-const                  No              2
             //
@@ -580,11 +503,11 @@ int LinearScan::BuildNode(GenTree* tree)
                     // This should also help in debugging as we can examine the original size specified with
                     // localloc.
                     sizeVal         = AlignUp(sizeVal, STACK_ALIGN);
-                    size_t stpCount = sizeVal / (REGSIZE_BYTES * 2);
+                    size_t insCount = sizeVal / (REGSIZE_BYTES * 2);
 
-                    // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc)
-                    //
-                    if (stpCount <= 4)
+                    // For small allocations up to 4 'st' instructions (i.e. 16 to 64 bytes of localloc)
+                    // TODO-LoongArch64: maybe use paird-load/store or SIMD in future.
+                    if (sizeVal <= (REGSIZE_BYTES * 2 * 4))
                     {
                         // Need no internal registers
                     }
@@ -703,7 +626,7 @@ int LinearScan::BuildNode(GenTree* tree)
                 // LOONGARCH64 does not support both Index and offset so we need an internal register
                 buildInternalIntRegisterDefForNode(tree);
             }
-            else if (!((-2048 <= cns) && (cns <= 2047)))
+            else if (!emitter::isValidSimm12(cns))
             {
                 // This offset can't be contained in the add instruction, so we need an internal register
                 buildInternalIntRegisterDefForNode(tree);
@@ -792,197 +715,8 @@ int LinearScan::BuildNode(GenTree* tree)
 //
 int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    int srcCount = 0;
-    // Only SIMDIntrinsicInit can be contained
-    if (simdTree->isContained())
-    {
-        assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
-    }
-    int dstCount = simdTree->IsValue() ? 1 : 0;
-    assert(dstCount == 1);
-
-    bool buildUses = true;
-
-    GenTree* op1 = simdTree->gtGetOp1();
-    GenTree* op2 = simdTree->gtGetOp2();
-
-    switch (simdTree->gtSIMDIntrinsicID)
-    {
-        case SIMDIntrinsicInit:
-        case SIMDIntrinsicCast:
-        case SIMDIntrinsicSqrt:
-        case SIMDIntrinsicAbs:
-        case SIMDIntrinsicConvertToSingle:
-        case SIMDIntrinsicConvertToInt32:
-        case SIMDIntrinsicConvertToDouble:
-        case SIMDIntrinsicConvertToInt64:
-        case SIMDIntrinsicWidenLo:
-        case SIMDIntrinsicWidenHi:
-            // No special handling required.
-            break;
-
-        case SIMDIntrinsicGetItem:
-        {
-            op1 = simdTree->gtGetOp1();
-            op2 = simdTree->gtGetOp2();
-
-            // We have an object and an index, either of which may be contained.
-            bool setOp2DelayFree = false;
-            if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
-            {
-                // If the index is not a constant and the object is not contained or is a local
-                // we will need a general purpose register to calculate the address
-                // internal register must not clobber input index
-                // TODO-Cleanup: An internal register will never clobber a source; this code actually
-                // ensures that the index (op2) doesn't interfere with the target.
-                buildInternalIntRegisterDefForNode(simdTree);
-                setOp2DelayFree = true;
-            }
-            srcCount += BuildOperandUses(op1);
-            if (!op2->isContained())
-            {
-                RefPosition* op2Use = BuildUse(op2);
-                if (setOp2DelayFree)
-                {
-                    setDelayFree(op2Use);
-                }
-                srcCount++;
-            }
-
-            if (!op2->IsCnsIntOrI() && (!op1->isContained()))
-            {
-                // If vector is not already in memory (contained) and the index is not a constant,
-                // we will use the SIMD temp location to store the vector.
-                compiler->getSIMDInitTempVarNum();
-            }
-            buildUses = false;
-        }
-        break;
-
-        case SIMDIntrinsicAdd:
-        case SIMDIntrinsicSub:
-        case SIMDIntrinsicMul:
-        case SIMDIntrinsicDiv:
-        case SIMDIntrinsicBitwiseAnd:
-        case SIMDIntrinsicBitwiseAndNot:
-        case SIMDIntrinsicBitwiseOr:
-        case SIMDIntrinsicBitwiseXor:
-        case SIMDIntrinsicMin:
-        case SIMDIntrinsicMax:
-        case SIMDIntrinsicEqual:
-        case SIMDIntrinsicLessThan:
-        case SIMDIntrinsicGreaterThan:
-        case SIMDIntrinsicLessThanOrEqual:
-        case SIMDIntrinsicGreaterThanOrEqual:
-            // No special handling required.
-            break;
-
-        case SIMDIntrinsicSetX:
-        case SIMDIntrinsicSetY:
-        case SIMDIntrinsicSetZ:
-        case SIMDIntrinsicSetW:
-        case SIMDIntrinsicNarrow:
-        {
-            // Op1 will write to dst before Op2 is free
-            BuildUse(op1);
-            RefPosition* op2Use = BuildUse(op2);
-            setDelayFree(op2Use);
-            srcCount  = 2;
-            buildUses = false;
-            break;
-        }
-
-        case SIMDIntrinsicInitN:
-        {
-            var_types baseType = simdTree->gtSIMDBaseType;
-            srcCount           = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
-            if (varTypeIsFloating(simdTree->gtSIMDBaseType))
-            {
-                // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
-                buildInternalFloatRegisterDefForNode(simdTree);
-            }
-
-            for (GenTree* operand : simdTree->Operands())
-            {
-                assert(operand->TypeIs(baseType));
-                assert(!operand->isContained());
-
-                BuildUse(operand);
-            }
-
-            buildUses = false;
-            break;
-        }
-
-        case SIMDIntrinsicInitArray:
-            // We have an array and an index, which may be contained.
-            break;
-
-        case SIMDIntrinsicOpEquality:
-        case SIMDIntrinsicOpInEquality:
-            buildInternalFloatRegisterDefForNode(simdTree);
-            break;
-
-        case SIMDIntrinsicDotProduct:
-            buildInternalFloatRegisterDefForNode(simdTree);
-            break;
-
-        case SIMDIntrinsicSelect:
-            // TODO-LOONGARCH64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB
-            // bsl target register must be VC.  Reserve a temp in case we need to shuffle things.
-            // This will require a different approach, as GenTreeSIMD has only two operands.
-            assert(!"SIMDIntrinsicSelect not yet supported");
-            buildInternalFloatRegisterDefForNode(simdTree);
-            break;
-
-        case SIMDIntrinsicInitArrayX:
-        case SIMDIntrinsicInitFixed:
-        case SIMDIntrinsicCopyToArray:
-        case SIMDIntrinsicCopyToArrayX:
-        case SIMDIntrinsicNone:
-        case SIMDIntrinsicGetCount:
-        case SIMDIntrinsicGetOne:
-        case SIMDIntrinsicGetZero:
-        case SIMDIntrinsicGetAllOnes:
-        case SIMDIntrinsicGetX:
-        case SIMDIntrinsicGetY:
-        case SIMDIntrinsicGetZ:
-        case SIMDIntrinsicGetW:
-        case SIMDIntrinsicInstEquals:
-        case SIMDIntrinsicHWAccel:
-        case SIMDIntrinsicWiden:
-        case SIMDIntrinsicInvalid:
-            assert(!"These intrinsics should not be seen during register allocation");
-            __fallthrough;
-
-        default:
-            noway_assert(!"Unimplemented SIMD node type.");
-            unreached();
-    }
-    if (buildUses)
-    {
-        assert(!op1->OperIs(GT_LIST));
-        assert(srcCount == 0);
-        srcCount = BuildOperandUses(op1);
-        if ((op2 != nullptr) && !op2->isContained())
-        {
-            srcCount += BuildOperandUses(op2);
-        }
-    }
-    assert(internalCount <= MaxInternalCount);
-    buildInternalRegisterUses();
-    if (dstCount == 1)
-    {
-        BuildDef(simdTree);
-    }
-    else
-    {
-        assert(dstCount == 0);
-    }
-    return srcCount;
-#endif
+    NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+    return 0;
 }
 #endif // FEATURE_SIMD
 
@@ -999,110 +733,8 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
 //
 int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
-    int            numArgs     = HWIntrinsicInfo::lookupNumArgs(intrinsicTree);
-
-    GenTree* op1      = intrinsicTree->gtGetOp1();
-    GenTree* op2      = intrinsicTree->gtGetOp2();
-    GenTree* op3      = nullptr;
-    int      srcCount = 0;
-
-    if ((op1 != nullptr) && op1->OperIsList())
-    {
-        // op2 must be null, and there must be at least two more arguments.
-        assert(op2 == nullptr);
-        noway_assert(op1->AsArgList()->Rest() != nullptr);
-        noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr);
-        assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
-        op2 = op1->AsArgList()->Rest()->Current();
-        op3 = op1->AsArgList()->Rest()->Rest()->Current();
-        op1 = op1->AsArgList()->Current();
-    }
-
-    bool op2IsDelayFree = false;
-    bool op3IsDelayFree = false;
-
-    // Create internal temps, and handle any other special requirements.
-    switch (HWIntrinsicInfo::lookup(intrinsicID).form)
-    {
-        case HWIntrinsicInfo::Sha1HashOp:
-            assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
-            if (!op2->isContained())
-            {
-                assert(!op3->isContained());
-                op2IsDelayFree           = true;
-                op3IsDelayFree           = true;
-                setInternalRegsDelayFree = true;
-            }
-            buildInternalFloatRegisterDefForNode(intrinsicTree);
-            break;
-        case HWIntrinsicInfo::SimdTernaryRMWOp:
-            assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
-            if (!op2->isContained())
-            {
-                assert(!op3->isContained());
-                op2IsDelayFree = true;
-                op3IsDelayFree = true;
-            }
-            break;
-        case HWIntrinsicInfo::Sha1RotateOp:
-            buildInternalFloatRegisterDefForNode(intrinsicTree);
-            break;
-
-        case HWIntrinsicInfo::SimdExtractOp:
-        case HWIntrinsicInfo::SimdInsertOp:
-            if (!op2->isContained())
-            {
-                // We need a temp to create a switch table
-                buildInternalIntRegisterDefForNode(intrinsicTree);
-            }
-            break;
-
-        default:
-            break;
-    }
-
-    // Next, build uses
-    if (numArgs > 3)
-    {
-        srcCount = 0;
-        assert(!op2IsDelayFree && !op3IsDelayFree);
-        assert(op1->OperIs(GT_LIST));
-        {
-            for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
-            {
-                srcCount += BuildOperandUses(list->Current());
-            }
-        }
-        assert(srcCount == numArgs);
-    }
-    else
-    {
-        if (op1 != nullptr)
-        {
-            srcCount += BuildOperandUses(op1);
-            if (op2 != nullptr)
-            {
-                srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2);
-                if (op3 != nullptr)
-                {
-                    srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3);
-                }
-            }
-        }
-    }
-    buildInternalRegisterUses();
-
-    // Now defs
-    if (intrinsicTree->IsValue())
-    {
-        BuildDef(intrinsicTree);
-    }
-
-    return srcCount;
-#endif
+    NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+    return 0;
 }
 #endif
 
@@ -1141,12 +773,17 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree)
                 // LOONGARCH does not support both Index and offset so we need an internal register
                 buildInternalIntRegisterDefForNode(indirTree);
             }
-            else if (!((-2048 <= cns) && (cns <= 2047)))
+            else if (!emitter::isValidSimm12(cns))
             {
                 // This offset can't be contained in the ldr/str instruction, so we need an internal register
                 buildInternalIntRegisterDefForNode(indirTree);
             }
         }
+        else if (addr->OperGet() == GT_CLS_VAR_ADDR)
+        {
+            // Reserve int to load constant from memory (IF_LARGELDC)
+            buildInternalIntRegisterDefForNode(indirTree);
+        }
     }
 
 #ifdef FEATURE_SIMD
@@ -1224,14 +861,23 @@ int LinearScan::BuildCall(GenTreeCall* call)
         // computed into a register.
         if (call->IsFastTailCall())
         {
-            // Fast tail call - make sure that call target is always computed in T9(LOONGARCH64)
-            // so that epilog sequence can generate "jr t9" to achieve fast tail call.
+            // Fast tail call - make sure that call target is always computed in T4(LOONGARCH64)
+            // so that epilog sequence can generate "jirl t4" to achieve fast tail call.
             ctrlExprCandidates = RBM_FASTTAILCALL_TARGET;
         }
     }
     else if (call->IsR2ROrVirtualStubRelativeIndir())
     {
-        buildInternalIntRegisterDefForNode(call);
+        // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM
+        // and will load call address into the temp register from this register.
+        regMaskTP candidates = RBM_NONE;
+        if (call->IsFastTailCall())
+        {
+            candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH;
+            assert(candidates != RBM_NONE);
+        }
+
+        buildInternalIntRegisterDefForNode(call, candidates);
     }
 
     RegisterType registerType = call->TypeGet();
@@ -1291,6 +937,9 @@ int LinearScan::BuildCall(GenTreeCall* call)
             {
 #ifdef DEBUG
                 assert(use.GetNode()->OperIs(GT_PUTARG_REG));
+                assert(use.GetNode()->GetRegNum() == argReg);
+                // Update argReg for the next putarg_reg (if any)
+                argReg = genRegArgNext(argReg);
 #endif
                 BuildUse(use.GetNode(), genRegMask(use.GetNode()->GetRegNum()));
                 srcCount++;
@@ -1320,6 +969,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
         }
     }
 
+#ifdef DEBUG
     // Now, count stack args
     // Note that these need to be computed into a register, but then
     // they're just stored to the stack - so the reg doesn't
@@ -1334,10 +984,8 @@ int LinearScan::BuildCall(GenTreeCall* call)
         // Skip arguments that have been moved to the Late Arg list
         if ((arg->gtFlags & GTF_LATE_ARG) == 0)
         {
-#ifdef DEBUG
             fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
             assert(curArgTabEntry != nullptr);
-#endif
 #if FEATURE_ARG_SPLIT
             // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they
             // define registers used by the call.
@@ -1353,6 +1001,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
             }
         }
     }
+#endif // DEBUG
 
     // If it is a fast tail call, it is already preferenced to use IP0.
     // Therefore, no need set src candidates on call tgt again.
@@ -1415,7 +1064,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
         }
         else
         {
-            // We can use a ldp/stp sequence so we need two internal registers for LOONGARCH64; one for ARM.
+            // We can use a ld/st sequence so we need two internal registers for LOONGARCH64.
             buildInternalIntRegisterDefForNode(argNode);
 
             if (putArgChild->OperGet() == GT_OBJ)
@@ -1476,14 +1125,13 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
     // Registers for split argument corresponds to source
     int dstCount = argNode->gtNumRegs;
 
-    regNumber argReg                  = argNode->GetRegNum();
-    regMaskTP argMask                 = RBM_NONE;
-    regMaskTP argMaskArr[MAX_REG_ARG] = {RBM_NONE};
-
-    for (unsigned i = 0; i < dstCount; i++)
+    regNumber argReg  = argNode->GetRegNum();
+    regMaskTP argMask = RBM_NONE;
+    for (unsigned i = 0; i < argNode->gtNumRegs; i++)
     {
-        argMaskArr[i] = genRegMask(argNode->GetRegNumByIdx(i));
-        argMask |= argMaskArr[i];
+        regNumber thisArgReg = (regNumber)((unsigned)argReg + i);
+        argMask |= genRegMask(thisArgReg);
+        argNode->SetRegNumByIdx(thisArgReg, i);
     }
 
     if (putArgChild->OperGet() == GT_FIELD_LIST)
@@ -1506,9 +1154,16 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
 
             // Consume all the registers, setting the appropriate register mask for the ones that
             // go into registers.
-            // (sourceRegCount < argNode->gtNumRegs)
-            BuildUse(node, argMaskArr[sourceRegCount], 0);
-            sourceRegCount++;
+            for (unsigned regIndex = 0; regIndex < 1; regIndex++)
+            {
+                regMaskTP sourceMask = RBM_NONE;
+                if (sourceRegCount < argNode->gtNumRegs)
+                {
+                    sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount));
+                }
+                sourceRegCount++;
+                BuildUse(node, sourceMask, regIndex);
+            }
         }
         srcCount += sourceRegCount;
         assert(putArgChild->isContained());
@@ -1518,7 +1173,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
         assert(putArgChild->TypeGet() == TYP_STRUCT);
         assert(putArgChild->OperGet() == GT_OBJ);
 
-        // We can use a ldr/str sequence so we need an internal register
+        // We can use a ld/st sequence so we need an internal register
         buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask);
 
         GenTree* objChild = putArgChild->gtGetOp1();
@@ -1536,11 +1191,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
         assert(putArgChild->isContained());
     }
     buildInternalRegisterUses();
-    assert((argMask != RBM_NONE) && ((int)genCountBits(argMask) == dstCount));
-    for (int i = 0; i < dstCount; i++)
-    {
-        BuildDef(argNode, argMaskArr[i], i);
-    }
+    BuildDefs(argNode, dstCount, argMask);
     return srcCount;
 }
 #endif // FEATURE_ARG_SPLIT
@@ -1611,7 +1262,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
 
             if (size >= 2 * REGSIZE_BYTES)
             {
-                // We will use ldp/stp to reduce code size and improve performance
+                // TODO-LoongArch64: We will use ld/st paired to reduce code size and improve performance
                 // so we need to reserve an extra internal register
                 buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
             }
diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h
index 5519b0639de4c..2962798473df0 100644
--- a/src/coreclr/jit/registerloongarch64.h
+++ b/src/coreclr/jit/registerloongarch64.h
@@ -52,6 +52,11 @@ REGDEF(S8,     31, 0x80000000, "s8"  )
 //NOTE for LoongArch64:
 //  The `REG_R21` which alias `REG_X0` is specially reserved !!!
 //  It can be used only by manully and should be very careful!!!
+//  e.g. right now LoongArch64's backend-codegen/emit, there is usually
+//  needed a extra regitster for some case liking
+//  constructing a large imm or offset, saving some intermediate result
+//  of the overflowing check and integer-comparing result.
+//  Using the a specially reserved register maybe more efficient.
 REGALIAS(R21, X0)
 
 #define FBASE 32

From c40d0b89a198d555e354bbc2707a5538612f970c Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Mon, 28 Feb 2022 16:10:24 +0800
Subject: [PATCH 30/46] [LoongArch64] merge fast-tail-call from main.

---
 src/coreclr/jit/codegenloongarch64.cpp | 429 +++++++++++++++----------
 src/coreclr/jit/lsraloongarch64.cpp    |   7 +-
 src/coreclr/jit/targetloongarch64.h    |   3 -
 3 files changed, 255 insertions(+), 184 deletions(-)

diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index af586a60e0715..6ab13a7c9198b 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1450,7 +1450,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
     addrInfo.addr       = nullptr;
     addrInfo.accessType = IAT_VALUE;
 
-    if (jmpEpilog && lastNode->gtOper == GT_JMP)
+    if (jmpEpilog && (lastNode->gtOper == GT_JMP))
     {
         methHnd = (CORINFO_METHOD_HANDLE)lastNode->AsVal()->gtVal1;
         compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
@@ -1558,13 +1558,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
         else
         {
             genPopCalleeSavedRegisters(true);
-            // Fast tail call.
-            // Call target = REG_FASTTAILCALL_TARGET
-            // https://github.com/dotnet/coreclr/issues/4827
-            // Do we need a special encoding for stack walker like rex.w prefix for x64?
-
-            // TODO-LOONGARCH64: whether the relative address is enough for optimize?
-            GetEmitter()->emitIns_R_R_I(INS_jirl, emitTypeSize(TYP_I_IMPL), REG_R0, REG_FASTTAILCALL_TARGET, 0);
+            genCallInstruction(jmpNode->AsCall());
         }
 #endif // FEATURE_FASTTAILCALL
     }
@@ -5652,7 +5646,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
 #endif // FEATURE_ARG_SPLIT
 
         case GT_CALL:
-            genCallInstruction(treeNode->AsCall());
+            genCall(treeNode->AsCall());
             break;
 
         case GT_MEMORYBARRIER:
@@ -6799,8 +6793,6 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
 //
 void CodeGen::genCodeForShift(GenTree* tree)
 {
-    // var_types   targetType = tree->TypeGet();
-    // genTreeOps  oper       = tree->OperGet();
     instruction ins  = genGetInsForOper(tree);
     emitAttr    size = emitActualTypeSize(tree);
 
@@ -7286,17 +7278,10 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst
 }
 
 //------------------------------------------------------------------------
-// genCallInstruction: Produce code for a GT_CALL node
+// genCall: Produce code for a GT_CALL node
 //
-void CodeGen::genCallInstruction(GenTreeCall* call)
+void CodeGen::genCall(GenTreeCall* call)
 {
-    gtCallTypes callType = (gtCallTypes)call->gtCallType;
-
-    DebugInfo di;
-
-    // all virtuals should have been expanded into a control expression
-    assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
-
     // Consume all the arg regs
     for (GenTreeCall::Use& use : call->LateArgs())
     {
@@ -7321,23 +7306,22 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
                 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
 
                 genConsumeReg(putArgRegNode);
+                inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ true, argReg, putArgRegNode->GetRegNum(),
+                                /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL));
+
+                argReg = genRegArgNext(argReg);
             }
         }
-#if FEATURE_ARG_SPLIT
         else if (curArgTabEntry->IsSplit())
         {
-            assert(curArgTabEntry->numRegs >= 1);
-            genConsumeArgSplitStruct(argNode->AsPutArgSplit());
+            NYI("unimplemented on LOONGARCH64 yet");
         }
-#endif // FEATURE_ARG_SPLIT
         else
         {
             regNumber argReg = curArgTabEntry->GetRegNum();
             genConsumeReg(argNode);
-            if (argNode->GetRegNum() != argReg)
-            {
-                inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, argNode->GetRegNum());
-            }
+            inst_Mov_Extend(argNode->TypeGet(), /* srcInReg */ true, argReg, argNode->GetRegNum(), /* canSkip */ true,
+                            emitActualTypeSize(TYP_I_IMPL));
         }
     }
 
@@ -7346,54 +7330,39 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
     {
         const regNumber regThis = genGetThisArgReg(call);
 
-        // Ditto as genCodeForNullCheck
         GetEmitter()->emitIns_R_R_I(INS_ld_w, EA_4BYTE, REG_R0, regThis, 0);
     }
 
-    // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper
-    // method.
-    CORINFO_METHOD_HANDLE methHnd;
-    GenTree*              target = call->gtControlExpr;
-    if (callType == CT_INDIRECT)
-    {
-        assert(target == nullptr);
-        target  = call->gtCallAddr;
-        methHnd = nullptr;
-    }
-    else
-    {
-        methHnd = call->gtCallMethHnd;
-    }
-
-    CORINFO_SIG_INFO* sigInfo = nullptr;
-#ifdef DEBUG
-    // Pass the call signature information down into the emitter so the emitter can associate
-    // native call sites with the signatures they were generated from.
-    if (callType != CT_HELPER)
-    {
-        sigInfo = call->callSig;
-    }
-#endif // DEBUG
-
-    // If fast tail call, then we are done.  In this case we setup the args (both reg args
-    // and stack args in incoming arg area) and call target.  Epilog sequence would
-    // generate "br <reg>".
+    // If fast tail call, then we are done here, we just have to load the call
+    // target into the right registers. We ensure in RA that target is loaded
+    // into a volatile register that won't be restored by epilog sequence.
     if (call->IsFastTailCall())
     {
-        // Don't support fast tail calling JIT helpers
-        assert(callType != CT_HELPER);
+        GenTree* target = getCallTarget(call, nullptr);
 
         if (target != nullptr)
         {
             // Indirect fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
             genConsumeReg(target);
+        }
+#ifdef FEATURE_READYTORUN
+        else if (call->IsR2ROrVirtualStubRelativeIndir())
+        {
+            assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) ||
+                   ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE)));
+            assert(call->gtControlExpr == nullptr);
 
-            // Use REG_FASTTAILCALL_TARGET on LOONGARCH64 as the call target register.
-            if (target->GetRegNum() != REG_FASTTAILCALL_TARGET)
-            {
-                GetEmitter()->emitIns_R_R_I(INS_ori, EA_4BYTE, REG_FASTTAILCALL_TARGET, target->GetRegNum(), 0);
-            }
+            regNumber tmpReg = call->GetSingleTempReg();
+            // Register where we save call address in should not be overridden by epilog.
+            assert((tmpReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == tmpReg);
+
+            regNumber callAddrReg =
+                call->IsVirtualStubRelativeIndir() ? compiler->virtualStubParamInfo->GetReg() : REG_R2R_INDIRECT_PARAM;
+            GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, callAddrReg);
+            // We will use this again when emitting the jump in genCallInstruction in the epilog
+            call->gtRsvdRegs |= genRegMask(tmpReg);
         }
+#endif
 
         return;
     }
@@ -7407,6 +7376,94 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
         genDefineTempLabel(genCreateTempLabel());
     }
 
+    genCallInstruction(call);
+
+    // for pinvoke/intrinsic/tailcalls we may have needed to get the address of
+    // a label. In case it is indirect with CFG enabled make sure we do not get
+    // the address after the validation but only after the actual call that
+    // comes after.
+    if (genPendingCallLabel && !call->IsHelperCall(compiler, CORINFO_HELP_VALIDATE_INDIRECT_CALL))
+    {
+        genDefineInlineTempLabel(genPendingCallLabel);
+        genPendingCallLabel = nullptr;
+    }
+
+#ifdef DEBUG
+    // We should not have GC pointers in killed registers live around the call.
+    // GC info for arg registers were cleared when consuming arg nodes above
+    // and LSRA should ensure it for other trashed registers.
+    regMaskTP killMask = RBM_CALLEE_TRASH;
+    if (call->IsHelperCall())
+    {
+        CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
+        killMask                 = compiler->compHelperCallKillSet(helpFunc);
+    }
+
+    assert((gcInfo.gcRegGCrefSetCur & killMask) == 0);
+    assert((gcInfo.gcRegByrefSetCur & killMask) == 0);
+#endif
+
+    var_types returnType = call->TypeGet();
+    if (returnType != TYP_VOID)
+    {
+        regNumber returnReg;
+
+        if (call->HasMultiRegRetVal())
+        {
+            const ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+            assert(pRetTypeDesc != nullptr);
+            unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+            // If regs allocated to call node are different from ABI return
+            // regs in which the call has returned its result, move the result
+            // to regs allocated to call node.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                var_types regType      = pRetTypeDesc->GetReturnRegType(i);
+                returnReg              = pRetTypeDesc->GetABIReturnReg(i);
+                regNumber allocatedReg = call->GetRegNumByIdx(i);
+                inst_Mov(regType, allocatedReg, returnReg, /* canSkip */ true);
+            }
+        }
+        else
+        {
+            if (varTypeUsesFloatArgReg(returnType))
+            {
+                returnReg = REG_FLOATRET;
+            }
+            else
+            {
+                returnReg = REG_INTRET;
+            }
+
+            if (call->GetRegNum() != returnReg)
+            {
+                inst_Mov(returnType, call->GetRegNum(), returnReg, /* canSkip */ false);
+            }
+        }
+
+        genProduceReg(call);
+    }
+
+    // If there is nothing next, that means the result is thrown away, so this value is not live.
+    // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+    if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+    {
+        gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCallInstruction - Generate instructions necessary to transfer control to the call.
+//
+// Arguments:
+//    call - the GT_CALL node
+//
+// Remaks:
+//   For tailcalls this function will generate a jump.
+//
+void CodeGen::genCallInstruction(GenTreeCall* call)
+{
     // Determine return value size(s).
     const ReturnTypeDesc* pRetTypeDesc  = call->GetReturnTypeDesc();
     emitAttr              retSize       = EA_PTRSIZE;
@@ -7431,7 +7488,8 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
         }
     }
 
-    // We need to propagate the IL offset information to the call instruction, so we can emit
+    DebugInfo di;
+    // We need to propagate the debug information to the call instruction, so we can emit
     // an IL to native mapping record for the call, to support managed return value debugging.
     // We don't want tail call helper calls that were converted from normal calls to get a record,
     // so we skip this hash table lookup logic in that case.
@@ -7440,153 +7498,168 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
         (void)compiler->genCallSite2DebugInfoMap->Lookup(call, &di);
     }
 
+    CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+    // Pass the call signature information down into the emitter so the emitter can associate
+    // native call sites with the signatures they were generated from.
+    if (call->gtCallType != CT_HELPER)
+    {
+        sigInfo = call->callSig;
+    }
+
+    if (call->IsFastTailCall())
+    {
+        regMaskTP trashedByEpilog = RBM_CALLEE_SAVED;
+
+        // The epilog may use and trash REG_GSCOOKIE_TMP_0/1. Make sure we have no
+        // non-standard args that may be trash if this is a tailcall.
+        if (compiler->getNeedsGSSecurityCookie())
+        {
+            trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_0);
+            trashedByEpilog |= genRegMask(REG_GSCOOKIE_TMP_1);
+        }
+
+        for (unsigned i = 0; i < call->fgArgInfo->ArgCount(); i++)
+        {
+            fgArgTabEntry* entry = call->fgArgInfo->GetArgEntry(i);
+            for (unsigned j = 0; j < entry->numRegs; j++)
+            {
+                regNumber reg = entry->GetRegNum(j);
+                if ((trashedByEpilog & genRegMask(reg)) != 0)
+                {
+                    JITDUMP("Tail call node:\n");
+                    DISPTREE(call);
+                    JITDUMP("Register used: %s\n", getRegName(reg));
+                    assert(!"Argument to tailcall may be trashed by epilog");
+                }
+            }
+        }
+    }
+#endif // DEBUG
+    CORINFO_METHOD_HANDLE methHnd;
+    GenTree*              target = getCallTarget(call, &methHnd);
+
     if (target != nullptr)
     {
         // A call target can not be a contained indirection
         assert(!target->isContainedIndir());
 
-        genConsumeReg(target);
+        // For fast tailcall we have already consumed the target. We ensure in
+        // RA that the target was allocated into a volatile register that will
+        // not be messed up by epilog sequence.
+        if (!call->IsFastTailCall())
+        {
+            genConsumeReg(target);
+        }
 
         // We have already generated code for gtControlExpr evaluating it into a register.
         // We just need to emit "call reg" in this case.
         //
         assert(genIsValidIntReg(target->GetRegNum()));
 
-        genEmitCall(emitter::EC_INDIR_R, methHnd,
-                    INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
-                    retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, target->GetRegNum(),
+        // clang-format off
+        genEmitCall(emitter::EC_INDIR_R,
+                    methHnd,
+                    INDEBUG_LDISASM_COMMA(sigInfo)
+                    nullptr, // addr
+                    retSize
+                    MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                    di,
+                    target->GetRegNum(),
                     call->IsFastTailCall());
-    }
-    else if (call->IsR2ROrVirtualStubRelativeIndir())
-    {
-        // Generate a direct call to a non-virtual user defined or helper method
-        assert(callType == CT_HELPER || callType == CT_USER_FUNC);
-#ifdef FEATURE_READYTORUN_COMPILER
-        assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) ||
-               ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE)));
-#endif // FEATURE_READYTORUN_COMPILER
-        assert(call->gtControlExpr == nullptr);
-        assert(!call->IsTailCall());
-
-        regNumber tmpReg = call->GetSingleTempReg();
-        GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, REG_R2R_INDIRECT_PARAM);
-
-        // We have now generated code for gtControlExpr evaluating it into `tmpReg`.
-        // We just need to emit "call tmpReg" in this case.
-        //
-        assert(genIsValidIntReg(tmpReg));
-
-        genEmitCall(emitter::EC_INDIR_R, methHnd,
-                    INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
-                    retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, tmpReg, call->IsFastTailCall());
+        // clang-format on
     }
     else
     {
-        // Generate a direct call to a non-virtual user defined or helper method
-        assert(callType == CT_HELPER || callType == CT_USER_FUNC);
-
-        void* addr = nullptr;
-#ifdef FEATURE_READYTORUN_COMPILER
-        if (call->gtEntryPoint.addr != NULL)
+        // If we have no target and this is a call with indirection cell then
+        // we do an optimization where we load the call address directly from
+        // the indirection cell instead of duplicating the tree. In BuildCall
+        // we ensure that get an extra register for the purpose. Note that for
+        // CFG the call might have changed to
+        // CORINFO_HELP_DISPATCH_INDIRECT_CALL in which case we still have the
+        // indirection cell but we should not try to optimize.
+        regNumber callThroughIndirReg = REG_NA;
+        if (!call->IsHelperCall(compiler, CORINFO_HELP_DISPATCH_INDIRECT_CALL))
         {
-            assert(call->gtEntryPoint.accessType == IAT_VALUE);
-            addr = call->gtEntryPoint.addr;
+            callThroughIndirReg = getCallIndirectionCellReg(call);
         }
-        else
-#endif // FEATURE_READYTORUN_COMPILER
-            if (callType == CT_HELPER)
-        {
-            CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
-            noway_assert(helperNum != CORINFO_HELP_UNDEF);
 
-            void* pAddr = nullptr;
-            addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
-            assert(pAddr == nullptr);
-        }
-        else
+        if (callThroughIndirReg != REG_NA)
         {
-            // Direct call to a non-virtual user function.
-            addr = call->gtDirectCallAddress;
-        }
+            assert(call->IsR2ROrVirtualStubRelativeIndir());
+            regNumber targetAddrReg = call->GetSingleTempReg();
+            // For fast tailcalls we have already loaded the call target when processing the call node.
+            if (!call->IsFastTailCall())
+            {
+                GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), targetAddrReg,
+                                          callThroughIndirReg);
+            }
+            else
+            {
+                // Register where we save call address in should not be overridden by epilog.
+                assert((targetAddrReg & (RBM_INT_CALLEE_TRASH & ~RBM_RA)) == targetAddrReg);
+            }
 
-        assert(addr != nullptr);
+            // We have now generated code loading the target address from the indirection cell into `targetAddrReg`.
+            // We just need to emit "bl targetAddrReg" in this case.
+            //
+            assert(genIsValidIntReg(targetAddrReg));
 
-        // Non-virtual direct call to known addresses
-        {
-            genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr,
-                        retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), di, REG_R21,
+            // clang-format off
+            genEmitCall(emitter::EC_INDIR_R,
+                        methHnd,
+                        INDEBUG_LDISASM_COMMA(sigInfo)
+                        nullptr, // addr
+                        retSize
+                        MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                        di,
+                        targetAddrReg,
                         call->IsFastTailCall());
-        }
-    }
-
-    // if it was a pinvoke we may have needed to get the address of a label
-    if (genPendingCallLabel)
-    {
-        genDefineInlineTempLabel(genPendingCallLabel);
-        genPendingCallLabel = nullptr;
-    }
-
-    // Update GC info:
-    // All Callee arg registers are trashed and no longer contain any GC pointers.
-    // TODO-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
-    // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
-    // registers from RBM_CALLEE_TRASH
-    assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
-    assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
-    gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
-    gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
-
-    var_types returnType = call->TypeGet();
-    if (returnType != TYP_VOID)
-    {
-        regNumber returnReg;
-
-        if (call->HasMultiRegRetVal())
-        {
-            assert(pRetTypeDesc != nullptr);
-            unsigned regCount = pRetTypeDesc->GetReturnRegCount();
-
-            // If regs allocated to call node are different from ABI return
-            // regs in which the call has returned its result, move the result
-            // to regs allocated to call node.
-            for (unsigned i = 0; i < regCount; ++i)
-            {
-                var_types regType      = pRetTypeDesc->GetReturnRegType(i);
-                returnReg              = pRetTypeDesc->GetABIReturnReg(i);
-                regNumber allocatedReg = call->GetRegNumByIdx(i);
-                if (returnReg != allocatedReg)
-                {
-                    inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
-                }
-            }
+            // clang-format on
         }
         else
         {
-            if (varTypeUsesFloatArgReg(returnType))
+            // Generate a direct call to a non-virtual user defined or helper method
+            assert(call->gtCallType == CT_HELPER || call->gtCallType == CT_USER_FUNC);
+
+            void* addr = nullptr;
+#ifdef FEATURE_READYTORUN
+            if (call->gtEntryPoint.addr != NULL)
             {
-                returnReg = REG_FLOATRET;
+                assert(call->gtEntryPoint.accessType == IAT_VALUE);
+                addr = call->gtEntryPoint.addr;
             }
             else
+#endif // FEATURE_READYTORUN
+                if (call->gtCallType == CT_HELPER)
             {
-                returnReg = REG_INTRET;
-            }
+                CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+                noway_assert(helperNum != CORINFO_HELP_UNDEF);
 
-            if (call->GetRegNum() != returnReg)
+                void* pAddr = nullptr;
+                addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+                assert(pAddr == nullptr);
+            }
+            else
             {
-                {
-                    inst_RV_RV(ins_Copy(returnType), call->GetRegNum(), returnReg, returnType);
-                }
+                // Direct call to a non-virtual user function.
+                addr = call->gtDirectCallAddress;
             }
-        }
 
-        genProduceReg(call);
-    }
+            assert(addr != nullptr);
 
-    // If there is nothing next, that means the result is thrown away, so this value is not live.
-    // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
-    if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
-    {
-        gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+            // clang-format off
+            genEmitCall(emitter::EC_FUNC_TOKEN,
+                        methHnd,
+                        INDEBUG_LDISASM_COMMA(sigInfo)
+                        addr,
+                        retSize
+                        MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                        di,
+                        REG_NA,
+                        call->IsFastTailCall());
+            // clang-format on
+        }
     }
 }
 
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 0c5d995953bdb..76416c597d82b 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -861,9 +861,10 @@ int LinearScan::BuildCall(GenTreeCall* call)
         // computed into a register.
         if (call->IsFastTailCall())
         {
-            // Fast tail call - make sure that call target is always computed in T4(LOONGARCH64)
-            // so that epilog sequence can generate "jirl t4" to achieve fast tail call.
-            ctrlExprCandidates = RBM_FASTTAILCALL_TARGET;
+            // Fast tail call - make sure that call target is always computed in volatile registers
+            // that will not be overridden by epilog sequence.
+            ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH;
+            assert(ctrlExprCandidates != RBM_NONE);
         }
     }
     else if (call->IsR2ROrVirtualStubRelativeIndir())
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
index 8e74ea0334a9c..2106d3f2bc0b1 100644
--- a/src/coreclr/jit/targetloongarch64.h
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -80,9 +80,6 @@
   #define REG_DEFAULT_HELPER_CALL_TARGET REG_T2
   #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_T2
 
-  #define REG_FASTTAILCALL_TARGET REG_T4   // Target register for fast tail call
-  #define RBM_FASTTAILCALL_TARGET RBM_T4
-
   #define RBM_ALLINT              (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
   #define RBM_ALLFLOAT            (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
   #define RBM_ALLDOUBLE            RBM_ALLFLOAT

From 20de75f0a7624ba3169d1764f6379a50898c69f1 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Sat, 26 Feb 2022 20:30:37 +0800
Subject: [PATCH 31/46] [LoongArch64] temp commit for windows compiling error.

---
 src/coreclr/inc/palclr.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/coreclr/inc/palclr.h b/src/coreclr/inc/palclr.h
index 2ab9c62c3e844..40fe2d1d3a2d1 100644
--- a/src/coreclr/inc/palclr.h
+++ b/src/coreclr/inc/palclr.h
@@ -606,4 +606,8 @@
 
 #include "palclr_win.h"
 
+#ifndef IMAGE_FILE_MACHINE_LOONGARCH64
+#define IMAGE_FILE_MACHINE_LOONGARCH64       0x6264  // LOONGARCH64.
+#endif
+
 #endif // defined(HOST_WINDOWS)

From d86e6b466b37dc44a6fca39e1038fca71b525d8d Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Tue, 1 Mar 2022 10:30:27 +0800
Subject: [PATCH 32/46] [LoongArch64] amend format for reviewing.

---
 src/coreclr/jit/emit.h                |  18 ++--
 src/coreclr/jit/lowerloongarch64.cpp  | 114 +-------------------------
 src/coreclr/jit/lsra.cpp              |   8 ++
 src/coreclr/jit/lsrabuild.cpp         |   5 +-
 src/coreclr/jit/lsraloongarch64.cpp   |  58 ++-----------
 src/coreclr/jit/registerloongarch64.h |   2 +-
 6 files changed, 32 insertions(+), 173 deletions(-)

diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 5008f79e680f9..126c74bdbefa7 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -607,9 +607,7 @@ class emitter
         static_assert_no_msg(IF_COUNT <= 128);
         insFormat _idInsFmt : 7;
 #elif defined(TARGET_LOONGARCH64)
-        unsigned _idCodeSize : 5; // the instruction(s) size of this instrDesc described. If not enough, please use the
-                                  // _idInsCount.
-                                  // unsigned _idInsCount : 5; // the instruction(s) count of this instrDesc described.
+        unsigned    _idCodeSize : 5; // the instruction(s) size of this instrDesc described.
 #else
         static_assert_no_msg(IF_COUNT <= 256);
         insFormat _idInsFmt : 8;
@@ -644,7 +642,7 @@ class emitter
         {
         }
 #else
-        insFormat idInsFmt() const
+        insFormat   idInsFmt() const
         {
             return _idInsFmt;
         }
@@ -1044,17 +1042,19 @@ class emitter
 #elif defined(TARGET_LOONGARCH64)
         unsigned    idCodeSize() const
         {
-            return _idCodeSize; //_idInsCount;
+            return _idCodeSize;
         }
         void idCodeSize(unsigned sz)
         {
-            assert(sz < 32);
+            // LoongArch64's instrDesc is not always meaning only one instruction.
+            // e.g. the `emitter::emitIns_I_la` for emitting the immediates.
+            assert(sz <= 16);
             _idCodeSize = sz;
         }
 #endif // TARGET_LOONGARCH64
 
         emitAttr idOpSize()
-        { // NOTE: not used for LOONGARCH64.
+        {
             return emitDecodeSize(_idOpSize);
         }
         void idOpSize(emitAttr opsz)
@@ -1889,8 +1889,8 @@ class emitter
 #endif // !defined(HOST_64BIT)
 
 #ifdef TARGET_LOONGARCH64
-    unsigned int emitCounts_INS_OPTS_J; // INS_OPTS_J
-#endif                                  // defined(TARGET_LOONGARCH64)
+    unsigned int emitCounts_INS_OPTS_J;
+#endif // TARGET_LOONGARCH64
 
     size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp);
     size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp);
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 0054decb0b019..13d6cb3469dd9 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -827,61 +827,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node)
 //
 void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    switch (simdNode->gtSIMDIntrinsicID)
-    {
-        GenTree* op1;
-        GenTree* op2;
-
-        case SIMDIntrinsicInit:
-            op1 = simdNode->gtOp.gtOp1;
-            if (op1->IsIntegralConst(0))
-            {
-                MakeSrcContained(simdNode, op1);
-            }
-            break;
-
-        case SIMDIntrinsicInitArray:
-            // We have an array and an index, which may be contained.
-            CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2());
-            break;
-
-        case SIMDIntrinsicOpEquality:
-        case SIMDIntrinsicOpInEquality:
-            // TODO-LOONGARCH64-CQ Support containing 0
-            break;
-
-        case SIMDIntrinsicGetItem:
-        {
-            // This implements get_Item method. The sources are:
-            //  - the source SIMD struct
-            //  - index (which element to get)
-            // The result is baseType of SIMD struct.
-            op1 = simdNode->gtOp.gtOp1;
-            op2 = simdNode->gtOp.gtOp2;
-
-            // If the index is a constant, mark it as contained.
-            if (op2->IsCnsIntOrI())
-            {
-                MakeSrcContained(simdNode, op2);
-            }
-
-            if (IsContainableMemoryOp(op1))
-            {
-                MakeSrcContained(simdNode, op1);
-                if (op1->OperGet() == GT_IND)
-                {
-                    op1->AsIndir()->Addr()->ClearContained();
-                }
-            }
-            break;
-        }
-
-        default:
-            break;
-    }
-#endif
+    NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
 }
 #endif // FEATURE_SIMD
 
@@ -894,63 +840,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
 //
 void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
 {
-    assert(!"unimplemented on LOONGARCH yet");
-#if 0
-    GenTreeArgList* argList = nullptr;
-    GenTree*        op1     = node->gtOp.gtOp1;
-    GenTree*        op2     = node->gtOp.gtOp2;
-
-    if (op1->OperIs(GT_LIST))
-    {
-        argList = op1->AsArgList();
-        op1     = argList->Current();
-        op2     = argList->Rest()->Current();
-    }
-
-    switch (HWIntrinsicInfo::lookup(node->gtHWIntrinsicId).form)
-    {
-        case HWIntrinsicInfo::SimdExtractOp:
-            if (op2->IsCnsIntOrI())
-            {
-                MakeSrcContained(node, op2);
-            }
-            break;
-
-        case HWIntrinsicInfo::SimdInsertOp:
-            if (op2->IsCnsIntOrI())
-            {
-                MakeSrcContained(node, op2);
-
-#if 0
-                // This is currently not supported downstream. The following (at least) need to be modifed:
-                //   GenTree::isContainableHWIntrinsic() needs to handle this.
-                //   CodeGen::genConsumRegs()
-                //
-                GenTree* op3 = argList->Rest()->Rest()->Current();
-
-                // In the HW intrinsics C# API there is no direct way to specify a vector element to element mov
-                //   VX[a] = VY[b]
-                // In C# this would naturally be expressed by
-                //   Insert(VX, a, Extract(VY, b))
-                // If both a & b are immediate constants contain the extract/getItem so that we can emit
-                //   the single instruction mov Vx[a], Vy[b]
-                if (op3->OperIs(GT_HWIntrinsic) && (op3->AsHWIntrinsic()->gtHWIntrinsicId == NI_LOONGARCH64_SIMD_GetItem))
-                {
-                    ContainCheckHWIntrinsic(op3->AsHWIntrinsic());
-
-                    if (op3->gtOp.gtOp2->isContained())
-                    {
-                        MakeSrcContained(node, op3);
-                    }
-                }
-#endif
-            }
-            break;
-
-        default:
-            break;
-    }
-#endif
+    NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
 }
 #endif // FEATURE_HW_INTRINSICS
 
diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
index bcb834ab3e0f9..6a42d72203b33 100644
--- a/src/coreclr/jit/lsra.cpp
+++ b/src/coreclr/jit/lsra.cpp
@@ -2663,10 +2663,18 @@ RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition*
     RegisterType regType    = currentInterval->registerType;
     regMaskTP    candidates = refPosition->registerAssignment;
 #ifdef TARGET_LOONGARCH64
+    // The LoongArch64's ABI which the float args maybe passed by integer register
+    // when no float register left but free integer register.
     if ((candidates & allRegs(regType)) != RBM_NONE)
+    {
         return regType;
+    }
     else
+    {
+        assert((regType == TYP_DOUBLE) || (regType == TYP_FLOAT));
+        assert((candidates & allRegs(TYP_I_IMPL)) != RBM_NONE);
         return TYP_I_IMPL;
+    }
 #else
     assert((candidates & allRegs(regType)) != RBM_NONE);
     return regType;
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index bba2b26ed5bba..0ae460498883d 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -593,9 +593,12 @@ RefPosition* LinearScan::newRefPosition(Interval*    theInterval,
         regNumber    physicalReg = genRegNumFromMask(mask);
         RefPosition* pos         = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
         assert(theInterval != nullptr);
-#ifndef TARGET_LOONGARCH64
+#ifdef TARGET_LOONGARCH64
         // The LoongArch64's ABI which the float args maybe passed by integer register
         // when no float register left but free integer register.
+        assert((regType(theInterval->registerType) == FloatRegisterType) ||
+               (allRegs(theInterval->registerType) & mask) != 0);
+#else
         assert((allRegs(theInterval->registerType) & mask) != 0);
 #endif
     }
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 76416c597d82b..207c79864470e 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -45,7 +45,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 int LinearScan::BuildNode(GenTree* tree)
 {
     assert(!tree->isContained());
-    int       srcCount;
+    int       srcCount      = 0;
     int       dstCount      = 0;
     regMaskTP dstCandidates = RBM_NONE;
     regMaskTP killMask      = RBM_NONE;
@@ -264,7 +264,6 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_RSZ:
         case GT_ROR:
             srcCount = BuildBinaryUses(tree->AsOp());
-            buildInternalRegisterUses();
             assert(dstCount == 1);
             BuildDef(tree);
             break;
@@ -368,43 +367,6 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_CMPXCHG:
         {
             NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
-
-            GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
-            srcCount                    = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
-            assert(dstCount == 1);
-
-            // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
-            {
-                // For LOONGARCH exclusives requires a single internal register
-                buildInternalIntRegisterDefForNode(tree);
-            }
-
-            // For LOONGARCH exclusives the lifetime of the addr and data must be extended because
-            // it may be used multiple during retries
-
-            // For LOONGARCH atomic cas the lifetime of the addr and data must be extended to prevent
-            // them being reused as the target register which must be destroyed early
-
-            RefPosition* locationUse = BuildUse(tree->AsCmpXchg()->gtOpLocation);
-            setDelayFree(locationUse);
-            RefPosition* valueUse = BuildUse(tree->AsCmpXchg()->gtOpValue);
-            setDelayFree(valueUse);
-            if (!cmpXchgNode->gtOpComparand->isContained())
-            {
-                RefPosition* comparandUse = BuildUse(tree->AsCmpXchg()->gtOpComparand);
-
-                // For LOONGARCH exclusives the lifetime of the comparand must be extended because
-                // it may be used used multiple during retries
-                // if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
-                {
-                    setDelayFree(comparandUse);
-                }
-            }
-
-            // Internals may not collide with target
-            setInternalRegsDelayFree = true;
-            buildInternalRegisterUses();
-            BuildDef(tree);
         }
         break;
 
@@ -415,7 +377,6 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_XCHG:
         {
             NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
-            srcCount = 1;
         }
         break;
 
@@ -502,8 +463,7 @@ int LinearScan::BuildNode(GenTree* tree)
                     // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
                     // This should also help in debugging as we can examine the original size specified with
                     // localloc.
-                    sizeVal         = AlignUp(sizeVal, STACK_ALIGN);
-                    size_t insCount = sizeVal / (REGSIZE_BYTES * 2);
+                    sizeVal = AlignUp(sizeVal, STACK_ALIGN);
 
                     // For small allocations up to 4 'st' instructions (i.e. 16 to 64 bytes of localloc)
                     // TODO-LoongArch64: maybe use paird-load/store or SIMD in future.
@@ -1067,6 +1027,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
         {
             // We can use a ld/st sequence so we need two internal registers for LOONGARCH64.
             buildInternalIntRegisterDefForNode(argNode);
+            buildInternalIntRegisterDefForNode(argNode);
 
             if (putArgChild->OperGet() == GT_OBJ)
             {
@@ -1155,16 +1116,13 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
 
             // Consume all the registers, setting the appropriate register mask for the ones that
             // go into registers.
-            for (unsigned regIndex = 0; regIndex < 1; regIndex++)
+            regMaskTP sourceMask = RBM_NONE;
+            if (sourceRegCount < argNode->gtNumRegs)
             {
-                regMaskTP sourceMask = RBM_NONE;
-                if (sourceRegCount < argNode->gtNumRegs)
-                {
-                    sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount));
-                }
-                sourceRegCount++;
-                BuildUse(node, sourceMask, regIndex);
+                sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount));
             }
+            sourceRegCount++;
+            BuildUse(node, sourceMask, 0);
         }
         srcCount += sourceRegCount;
         assert(putArgChild->isContained());
diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h
index 2962798473df0..b58b7757b41d2 100644
--- a/src/coreclr/jit/registerloongarch64.h
+++ b/src/coreclr/jit/registerloongarch64.h
@@ -53,7 +53,7 @@ REGDEF(S8,     31, 0x80000000, "s8"  )
 //  The `REG_R21` which alias `REG_X0` is specially reserved !!!
 //  It can be used only by manully and should be very careful!!!
 //  e.g. right now LoongArch64's backend-codegen/emit, there is usually
-//  needed a extra regitster for some case liking
+//  a need for an extra register for cases like
 //  constructing a large imm or offset, saving some intermediate result
 //  of the overflowing check and integer-comparing result.
 //  Using the a specially reserved register maybe more efficient.

From a235523cda2b7f5e5f43ce4995442dd1acaffe86 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 4 Mar 2022 17:47:21 +0800
Subject: [PATCH 33/46] [LoongArch64] amend the coding for LA-ABI's flags.

---
 src/coreclr/jit/compiler.cpp | 2 +-
 src/coreclr/jit/importer.cpp | 2 +-
 src/coreclr/jit/lclvars.cpp  | 2 +-
 src/coreclr/jit/morph.cpp    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 278f6466aca6a..15df62dadb9b6 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -937,7 +937,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
     {
         uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd);
 
-        if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE)
+        if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
         {
             howToReturnStruct = SPK_PrimitiveType;
             useType           = (structSize > 4) ? TYP_DOUBLE : TYP_FLOAT;
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index e8658621055bb..32d821142acbc 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -11389,7 +11389,7 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
                     fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal;
                 op1->gtType = TYP_LONG;
             }
-            else if (op1->gtOper == GT_CNS_INT)
+            else
                 *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
 #else
             op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 7cb1109f4afef..b15be80826c44 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -891,7 +891,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         {
             assert(varTypeIsStruct(argType));
             int floatNum = 0;
-            if (floatFlags == STRUCT_FLOAT_FIELD_ONLY_ONE)
+            if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
             {
                 assert(argSize <= 8);
                 assert(varDsc->lvExactSize <= argSize);
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 0e348b24befb5..e5c2a413aadb5 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3518,7 +3518,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                         else
                             intArgRegNum += size;
                     }
-                    else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE)
+                    else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
                     {
                         structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT;
                         fltArgRegNum += 1;

From 9cc28ade034c215123b08f8eb0876e3d49338465 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Tue, 15 Mar 2022 15:51:53 +0800
Subject: [PATCH 34/46] [LoongArch64] amend some missed CRs.

---
 src/coreclr/jit/codegenloongarch64.cpp | 276 ++++++++++++++++++++-----
 src/coreclr/jit/emit.h                 |   2 +-
 src/coreclr/jit/lclvars.cpp            |   5 +-
 src/coreclr/jit/lsraloongarch64.cpp    |  42 +++-
 src/coreclr/jit/morph.cpp              | 111 ++--------
 src/coreclr/jit/targetloongarch64.h    |   2 +-
 6 files changed, 281 insertions(+), 157 deletions(-)

diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 6ab13a7c9198b..07c876496da57 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -234,7 +234,9 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1,
 
     instruction ins = INS_st_d;
     if (genIsValidFloatReg(reg1))
+    {
         ins = INS_fst_d;
+    }
 
     if (spDelta != 0)
     {
@@ -281,7 +283,9 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum
 
     instruction ins = INS_st_d;
     if (genIsValidFloatReg(reg1))
+    {
         ins = INS_fst_d;
+    }
 
     if (spDelta != 0)
     {
@@ -330,7 +334,9 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1,
 
     instruction ins = INS_ld_d;
     if (genIsValidFloatReg(reg1))
+    {
         ins = INS_fld_d;
+    }
 
     if (spDelta != 0)
     {
@@ -378,7 +384,9 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg
 
     instruction ins = INS_ld_d;
     if (genIsValidFloatReg(reg1))
+    {
         ins = INS_fld_d;
+    }
 
     if (spDelta != 0)
     {
@@ -452,6 +460,7 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* reg
                 }
             }
         }
+
         if (!isPairSave)
         {
             regStack->Push(RegPair(reg1));
@@ -1034,7 +1043,9 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
 
 #ifdef DEBUG
     if (compiler->opts.disAsm)
+    {
         printf("DEBUG: CodeGen::genFuncletProlog, frameType:%d\n\n", genFuncletInfo.fiFrameType);
+    }
 #endif
 
     int offset = 0;
@@ -1166,7 +1177,9 @@ void CodeGen::genFuncletEpilog()
 {
 #ifdef DEBUG
     if (verbose)
+    {
         printf("*************** In genFuncletEpilog()\n");
+    }
 #endif
 
     ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
@@ -1190,7 +1203,9 @@ void CodeGen::genFuncletEpilog()
 
 #ifdef DEBUG
     if (compiler->opts.disAsm)
+    {
         printf("DEBUG: CodeGen::genFuncletEpilog, frameType:%d\n\n", genFuncletInfo.fiFrameType);
+    }
 #endif
 
     regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
@@ -1292,7 +1307,9 @@ void CodeGen::genFuncletEpilog()
 void CodeGen::genCaptureFuncletPrologEpilogInfo()
 {
     if (!compiler->ehAnyFunclets())
+    {
         return;
+    }
 
     assert(isFramePointerUsed());
 
@@ -1312,10 +1329,14 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
 
     unsigned saveRegsPlusPSPSize;
     if (!IsSaveFpRaWithAllCalleeSavedRegisters())
+    {
         saveRegsPlusPSPSize =
             roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize /* -2*8*/;
+    }
     else
+    {
         saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize;
+    }
 
     if (compiler->info.compIsVarArgs)
     {
@@ -1413,7 +1434,9 @@ void CodeGen::genFnEpilog(BasicBlock* block)
 {
 #ifdef DEBUG
     if (verbose)
+    {
         printf("*************** In genFnEpilog()\n");
+    }
 #endif // DEBUG
 
     ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
@@ -1424,7 +1447,9 @@ void CodeGen::genFnEpilog(BasicBlock* block)
 
 #ifdef DEBUG
     if (compiler->opts.dspCode)
+    {
         printf("\n__epilog:\n");
+    }
 
     if (verbose)
     {
@@ -2239,20 +2264,26 @@ void CodeGen::genSimpleReturn(GenTree* treeNode)
         if (varTypeUsesFloatArgReg(treeNode))
         {
             if (attr == EA_4BYTE)
+            {
                 GetEmitter()->emitIns_R_R(INS_fmov_s, attr, retReg, op1->GetRegNum());
+            }
             else
+            {
                 GetEmitter()->emitIns_R_R(INS_fmov_d, attr, retReg, op1->GetRegNum());
+            }
         }
         else
         {
-            if (attr == EA_4BYTE) // && op1->OperIs(GT_LCL_VAR) &&
-                                  // (emitActualTypeSize(compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvType) ==
-                                  // EA_8BYTE))
+            if (attr == EA_4BYTE)
             {
                 if (treeNode->gtFlags & GTF_UNSIGNED)
+                {
                     GetEmitter()->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, retReg, op1->GetRegNum(), 31, 0);
+                }
                 else
+                {
                     GetEmitter()->emitIns_R_R_I(INS_slli_w, attr, retReg, op1->GetRegNum(), 0);
+                }
             }
             else
                 GetEmitter()->emitIns_R_R_I(INS_ori, attr, retReg, op1->GetRegNum(), 0);
@@ -2368,26 +2399,29 @@ void CodeGen::genLclHeap(GenTree* tree)
         static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
         assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time
         size_t stpCount = amount / (REGSIZE_BYTES * 2);
-        if (stpCount <= 4)
+        if (compiler->info.compInitMem)
         {
-            imm = -16 * stpCount;
-            emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
-
-            imm = -imm;
-            while (stpCount != 0)
+            if (stpCount <= 4)
             {
-                imm -= 8;
-                emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm);
-                imm -= 8;
-                emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm);
-                stpCount -= 1;
-            }
+                imm = -16 * stpCount;
+                emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
 
-            lastTouchDelta = 0;
+                imm = -imm;
+                while (stpCount != 0)
+                {
+                    imm -= 8;
+                    emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm);
+                    imm -= 8;
+                    emit->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, REG_SPBASE, imm);
+                    stpCount -= 1;
+                }
 
-            goto ALLOC_DONE;
+                lastTouchDelta = 0;
+
+                goto ALLOC_DONE;
+            }
         }
-        else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <=
+        else if (amount < compiler->eeGetPageSize()) // must be < not <=
         {
             // Since the size is less than a page, simply adjust the SP value.
             // The SP might already be in the guard page, so we must touch it BEFORE
@@ -2398,14 +2432,14 @@ void CodeGen::genLclHeap(GenTree* tree)
 
             lastTouchDelta = amount;
             imm            = -(ssize_t)amount;
-            assert(-8192 <= imm && imm < 0);
-            if (-2048 <= imm && imm < 0)
+            if (isValidSimm12(imm))
+            {
                 emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
+            }
             else
             {
-                emit->emitIns_R_R_I(INS_srai_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3);
-                emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm >> 3);
-                emit->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3);
+                emit->emitIns_I_la(EA_PTRSIZE, rsGetRsvdReg(), amount);
+                emit->emitIns_R_R_R(INS_sub_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rsGetRsvdReg());
             }
 
             goto ALLOC_DONE;
@@ -2749,7 +2783,9 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                         ins = INS_div_w;
                     }
                     else
+                    {
                         ins = INS_mod_w;
+                    }
                 }
                 else
                 {
@@ -2758,7 +2794,9 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                         ins = INS_div_d;
                     }
                     else
+                    {
                         ins = INS_mod_d;
+                    }
                 }
 
                 emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg);
@@ -2786,7 +2824,9 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                         ins = INS_div_wu;
                     }
                     else
+                    {
                         ins = INS_mod_wu;
+                    }
 
                     // TODO-LOONGARCH64: here is just for signed-extension ?
                     emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, Reg1, Reg1, 0);
@@ -2799,7 +2839,9 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
                         ins = INS_div_du;
                     }
                     else
+                    {
                         ins = INS_mod_du;
+                    }
                 }
 
                 emit->emitIns_R_R_R(ins, size, tree->GetRegNum(), Reg1, divisorReg);
@@ -3188,15 +3230,12 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
 
 static inline bool isImmed(GenTree* treeNode)
 {
-    if (treeNode->gtGetOp1()->isContainedIntOrIImmed())
+    assert(treeNode->OperIsBinary());
+
+    if (treeNode->gtGetOp2()->isContainedIntOrIImmed())
     {
         return true;
     }
-    else if (treeNode->OperIsBinary())
-    {
-        if (treeNode->gtGetOp2()->isContainedIntOrIImmed())
-            return true;
-    }
 
     return false;
 }
@@ -3218,33 +3257,53 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
         {
             case GT_ADD:
                 if (attr == EA_4BYTE)
+                {
                     ins = INS_fadd_s;
+                }
                 else
+                {
                     ins = INS_fadd_d;
+                }
                 break;
             case GT_SUB:
                 if (attr == EA_4BYTE)
+                {
                     ins = INS_fsub_s;
+                }
                 else
+                {
                     ins = INS_fsub_d;
+                }
                 break;
             case GT_MUL:
                 if (attr == EA_4BYTE)
+                {
                     ins = INS_fmul_s;
+                }
                 else
+                {
                     ins = INS_fmul_d;
+                }
                 break;
             case GT_DIV:
                 if (attr == EA_4BYTE)
+                {
                     ins = INS_fdiv_s;
+                }
                 else
+                {
                     ins = INS_fdiv_d;
+                }
                 break;
             case GT_NEG:
                 if (attr == EA_4BYTE)
+                {
                     ins = INS_fneg_s;
+                }
                 else
+                {
                     ins = INS_fneg_d;
+                }
                 break;
 
             default:
@@ -3286,7 +3345,6 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 break;
 
             case GT_SUB:
-                isImm = isImmed(treeNode);
                 if ((attr == EA_8BYTE) || (attr == EA_BYREF))
                 {
                     ins = INS_sub_d;
@@ -3410,16 +3468,24 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 {
                     // it's better to check sa.
                     if (attr == EA_4BYTE)
+                    {
                         ins = INS_slli_w;
+                    }
                     else
+                    {
                         ins = INS_slli_d;
+                    }
                 }
                 else
                 {
                     if (attr == EA_4BYTE)
+                    {
                         ins = INS_sll_w;
+                    }
                     else
+                    {
                         ins = INS_sll_d;
+                    }
                 }
                 break;
 
@@ -3429,16 +3495,24 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 {
                     // it's better to check sa.
                     if (attr == EA_4BYTE)
+                    {
                         ins = INS_srli_w;
+                    }
                     else
+                    {
                         ins = INS_srli_d;
+                    }
                 }
                 else
                 {
                     if (attr == EA_4BYTE)
+                    {
                         ins = INS_srl_w;
+                    }
                     else
+                    {
                         ins = INS_srl_d;
+                    }
                 }
                 break;
 
@@ -3448,16 +3522,24 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 {
                     // it's better to check sa.
                     if (attr == EA_4BYTE)
+                    {
                         ins = INS_srai_w;
+                    }
                     else
+                    {
                         ins = INS_srai_d;
+                    }
                 }
                 else
                 {
                     if (attr == EA_4BYTE)
+                    {
                         ins = INS_sra_w;
+                    }
                     else
+                    {
                         ins = INS_sra_d;
+                    }
                 }
                 break;
 
@@ -3467,16 +3549,24 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 {
                     // it's better to check sa.
                     if (attr == EA_4BYTE)
+                    {
                         ins = INS_rotri_w;
+                    }
                     else
+                    {
                         ins = INS_rotri_d;
+                    }
                 }
                 else
                 {
                     if (attr == EA_4BYTE)
+                    {
                         ins = INS_rotr_w;
+                    }
                     else
+                    {
                         ins = INS_rotr_d;
+                    }
                 }
                 break;
 
@@ -3874,17 +3964,6 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
             }
         }
 
-        //{
-        //    GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, tmpReg, REG_R0);
-
-        //    GetEmitter()->emitIns_R_R_I(srcType == TYP_DOUBLE ? INS_c_olt_d : INS_c_olt_s, EA_8BYTE, op1->GetRegNum(),
-        //    tmpReg, 2);
-        //    GetEmitter()->emitIns_I_I(INS_bc1f, EA_PTRSIZE, 2, 4 << 2);
-
-        //    GetEmitter()->emitIns_R_R_I(INS_ori*/, EA_PTRSIZE, treeNode->GetRegNum(), REG_R0, 0);
-        //    GetEmitter()->emitIns_I(INS_b, EA_PTRSIZE, srcType == TYP_DOUBLE ? 14 << 2 : 13 << 2);
-        //}
-
         if (srcType == TYP_DOUBLE)
             GetEmitter()->emitIns_R_R_I(INS_lu52i_d, EA_8BYTE, REG_R21, REG_R0, imm >> 8);
         else
@@ -4017,44 +4096,68 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
         if (IsUnordered)
         {
             if (tree->OperIs(GT_LT))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op1->GetRegNum(),
                                     op2->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_LE))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op1->GetRegNum(),
                                     op2->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_EQ))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cueq_s : INS_fcmp_cueq_d, cmpSize, op1->GetRegNum(),
                                     op2->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_NE))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cune_s : INS_fcmp_cune_d, cmpSize, op1->GetRegNum(),
                                     op2->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_GT))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cult_s : INS_fcmp_cult_d, cmpSize, op2->GetRegNum(),
                                     op1->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_GE))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cule_s : INS_fcmp_cule_d, cmpSize, op2->GetRegNum(),
                                     op1->GetRegNum(), 1 /*cc*/);
+            }
         }
         else
         {
             if (tree->OperIs(GT_LT))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op1->GetRegNum(),
                                     op2->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_LE))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op1->GetRegNum(),
                                     op2->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_EQ))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_ceq_s : INS_fcmp_ceq_d, cmpSize, op1->GetRegNum(),
                                     op2->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_NE))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cne_s : INS_fcmp_cne_d, cmpSize, op1->GetRegNum(),
                                     op2->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_GT))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_clt_s : INS_fcmp_clt_d, cmpSize, op2->GetRegNum(),
                                     op1->GetRegNum(), 1 /*cc*/);
+            }
             else if (tree->OperIs(GT_GE))
+            {
                 emit->emitIns_R_R_I(cmpSize == EA_4BYTE ? INS_fcmp_cle_s : INS_fcmp_cle_d, cmpSize, op2->GetRegNum(),
                                     op1->GetRegNum(), 1 /*cc*/);
+            }
         }
 
         emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0);
@@ -4544,7 +4647,9 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
             }
 
             if (IsEq && (ins != INS_invalid))
+            {
                 emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0); // 5-bits;
+            }
             else if (ins != INS_invalid)
             {
                 jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg;
@@ -4592,9 +4697,13 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                     {
                         case EA_4BYTE:
                             if (IsUnsigned || ((op2->gtFlags | op1->gtFlags) & GTF_UNSIGNED))
+                            {
                                 imm = static_cast<uint32_t>(imm);
+                            }
                             else
+                            {
                                 imm = static_cast<int32_t>(imm);
+                            }
                             break;
                         case EA_8BYTE:
                             break;
@@ -4719,7 +4828,9 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
             }
 
             if (IsEq)
+            {
                 emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, SaveCcResultReg); // 5-bits;
+            }
             else
             {
                 jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg;
@@ -5303,13 +5414,14 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm
     // function that does a probe, which will in turn call this function.
     assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize());
 
-    if (-2048 <= spDelta && spDelta < 0)
+    if (isValidSimm12(spDelta))
+    {
         GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta);
+    }
     else
     {
-        GetEmitter()->emitIns_R_R_I(INS_srai_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3);
-        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta >> 3);
-        GetEmitter()->emitIns_R_R_I(INS_slli_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, 3);
+        GetEmitter()->emitIns_I_la(EA_PTRSIZE, REG_R21, spDelta);
+        GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, REG_R21);
     }
 }
 
@@ -5450,7 +5562,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
 
         case GT_CNS_INT:
             if ((targetType == TYP_DOUBLE) || (targetType == TYP_FLOAT))
+            {
                 treeNode->gtOper = GT_CNS_DBL;
+            }
             FALLTHROUGH;
         case GT_CNS_DBL:
             genSetRegToConst(targetReg, targetType, treeNode);
@@ -5590,14 +5704,18 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
                 genCodeForCompare(treeNode->AsOp());
             }
             else if (!treeNode->gtNext)
+            {
                 genCodeForJumpTrue(treeNode->AsOp());
+            }
             else if (!treeNode->gtNext->OperIs(GT_JTRUE))
             {
                 GenTree* treeNode_next = treeNode->gtNext;
                 while (treeNode_next)
                 {
                     if (treeNode_next->OperIs(GT_JTRUE))
+                    {
                         break;
+                    }
                     treeNode_next = treeNode_next->gtNext;
                 };
                 assert(treeNode_next->OperIs(GT_JTRUE));
@@ -5853,7 +5971,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
     // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
     // executing GS cookie check will not collect the object pointed to by REG_INTRET (A0).
     if (!pushReg && (compiler->info.compRetNativeType == TYP_REF))
+    {
         gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+    }
 
     // We need two temporary registers, to load the GS cookie values and compare them. We can't use
     // any argument registers if 'pushReg' is true (meaning we have a JMP call). They should be
@@ -6227,16 +6347,18 @@ void CodeGen::genPutArgReg(GenTreeOp* tree)
     if (targetReg != op1->GetRegNum())
     {
         if (emitter::isFloatReg(targetReg) == emitter::isFloatReg(op1->GetRegNum()))
+        {
             inst_RV_RV(ins_Copy(targetType), targetReg, op1->GetRegNum(), targetType);
-#if 1
+        }
         else if (emitter::isFloatReg(targetReg))
+        {
             GetEmitter()->emitIns_R_R(INS_movgr2fr_d, EA_8BYTE, targetReg, op1->GetRegNum());
-        else // if (!emitter::isFloatReg(targetReg))
+        }
+        else
         {
             assert(!emitter::isFloatReg(targetReg));
             GetEmitter()->emitIns_R_R(INS_movfr2gr_d, EA_8BYTE, targetReg, op1->GetRegNum());
         }
-#endif
     }
     genProduceReg(tree);
 }
@@ -6925,7 +7047,7 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
         // }
         //
         // sltu  REG_R21, index, REG_R21
-        // bne  REG_21, zero, RngChkExit
+        // bne  REG_R21, zero, RngChkExit
         // IndRngFail:
         // ...
         // RngChkExit:
@@ -7268,7 +7390,9 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst
     if (base->OperIsLocalAddr())
     {
         if (base->gtOper == GT_LCL_FLD_ADDR)
+        {
             offset += base->AsLclFld()->GetLclOffs();
+        }
         emit->emitIns_R_S(ins, size, dst, base->AsLclVarCommon()->GetLclNum(), offset);
     }
     else
@@ -7294,7 +7418,9 @@ void CodeGen::genCall(GenTreeCall* call)
         argNode = argNode->gtSkipReloadOrCopy();
 
         if (curArgTabEntry->GetRegNum() == REG_STK)
+        {
             continue;
+        }
 
         // Deal with multi register passed struct args.
         if (argNode->OperGet() == GT_FIELD_LIST)
@@ -7711,7 +7837,9 @@ void CodeGen::genJmpMethod(GenTree* jmp)
             // If we need to generate a tail call profiler hook, then spill all
             // arg regs to free them up for the callback.
             if (!compiler->compIsProfilerHookNeeded() && (varDsc->GetRegNum() == varDsc->GetArgReg()))
+            {
                 continue;
+            }
         }
         else if (varDsc->GetRegNum() == REG_STK)
         {
@@ -7763,7 +7891,9 @@ void CodeGen::genJmpMethod(GenTree* jmp)
 
         // Skip if arg not passed in a register.
         if (!varDsc->lvIsRegArg)
+        {
             continue;
+        }
 
         // Register argument
         noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
@@ -8057,9 +8187,13 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast)
             default:
                 assert(desc.ExtendKind() == GenIntCastDesc::COPY);
                 if (srcType == TYP_INT)
+                {
                     emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, dstReg, srcReg, 0);
+                }
                 else
+                {
                     emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, dstReg, srcReg, 0);
+                }
                 break;
         }
     }
@@ -8161,7 +8295,9 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
         if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
         {
             if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+            {
                 preservedAreaSize += REGSIZE_BYTES;
+            }
 
             preservedAreaSize += 1; // bool for synchronized methods
         }
@@ -8582,15 +8718,26 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(
         // maybe optimize
         // ins = (instruction)(ins^((ins != INS_beq)+(ins != INS_bne)));
         if (ins == INS_blt)
+        {
             ins = INS_bge;
+        }
         else if (ins == INS_bltu)
+        {
             ins = INS_bgeu;
+        }
         else if (ins == INS_bge)
+        {
             ins = INS_blt;
+        }
         else if (ins == INS_bgeu)
+        {
             ins = INS_bltu;
+        }
         else
+        {
             ins = ins == INS_beq ? INS_bne : INS_beq;
+        }
+
         if (addr == nullptr)
         {
             callType   = emitter::EC_INDIR_R;
@@ -8626,7 +8773,9 @@ inline void CodeGen::genJumpToThrowHlpBlk_la(
 
             ssize_t imm = 5 << 2;
             if (compiler->opts.compReloc)
+            {
                 imm = 3 << 2;
+            }
 
             emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, reg2, imm);
         }
@@ -8714,7 +8863,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
 
     // On LA we push the FP (frame-pointer) here along with all other callee saved registers
     if (isFramePointerUsed())
+    {
         rsPushRegs |= RBM_FPBASE;
+    }
 
     //
     // It may be possible to skip pushing/popping ra for leaf methods. However, such optimization would require
@@ -9016,7 +9167,9 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
 
 #ifdef DEBUG
     if (compiler->opts.disAsm)
+    {
         printf("DEBUG: LOONGARCH64, frameType:%d\n\n", frameType);
+    }
 #endif
     if (frameType == 1)
     {
@@ -9181,10 +9334,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
                     else
                     {
                         outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf;
-                        // if (outSzAligned > 0)
-                        {
-                            genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true);
-                        }
+                        genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true);
                     }
 
                     regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
@@ -9360,7 +9510,9 @@ void CodeGen::genFnPrologCalleeRegArgs()
             psiMoveToReg(varNum);
 #endif // USING_SCOPE_INFO
             if (!varDsc->lvLiveInOutOfHndlr)
+            {
                 continue;
+            }
         }
 
         // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
@@ -9382,13 +9534,17 @@ void CodeGen::genFnPrologCalleeRegArgs()
             {
                 storeType = varDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
             }
-            else // if (emitter::isGeneralRegister(varDsc->GetArgReg()))
+            else
             {
                 assert(emitter::isGeneralRegister(varDsc->GetArgReg()));
                 if (varDsc->lvIs4Field1)
+                {
                     storeType = TYP_INT;
+                }
                 else
+                {
                     storeType = varDsc->GetLayout()->GetGCPtrType(0);
+                }
             }
             slotSize = (unsigned)emitActualTypeSize(storeType);
 
@@ -9416,7 +9572,9 @@ void CodeGen::genFnPrologCalleeRegArgs()
             noway_assert(varDsc->lvRefCnt() == 0);
             regArgMaskLive &= ~genRegMask(varDsc->GetArgReg());
             if (varDsc->GetOtherArgReg() < REG_STK)
+            {
                 regArgMaskLive &= ~genRegMask(varDsc->GetOtherArgReg());
+            }
         }
         else
         {
@@ -9464,13 +9622,19 @@ void CodeGen::genFnPrologCalleeRegArgs()
                 {
                     baseOffset = (int)EA_SIZE(slotSize);
                     if (varDsc->lvIs4Field2)
+                    {
                         storeType = TYP_INT;
+                    }
                     else
+                    {
                         storeType = varDsc->GetLayout()->GetGCPtrType(1);
-                    size          = emitActualTypeSize(storeType);
+                    }
+                    size = emitActualTypeSize(storeType);
                     if (baseOffset < (int)EA_SIZE(size))
+                    {
                         baseOffset = (int)EA_SIZE(size);
-                    srcRegNum      = varDsc->GetOtherArgReg();
+                    }
+                    srcRegNum = varDsc->GetOtherArgReg();
                 }
 
                 if (srcRegNum == varDsc->GetOtherArgReg())
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 126c74bdbefa7..28d52d5e0bd38 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -597,7 +597,7 @@ class emitter
         // TODO-LoongArch64: not include SIMD-vector.
         static_assert_no_msg(INS_count <= 512);
         instruction _idIns : 9;
-#else  // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
+#else
         static_assert_no_msg(INS_count <= 256);
         instruction _idIns : 8;
 #endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index c0b9c1e0cce58..c4d14d2f2006d 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -1048,14 +1048,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     {
                         firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2Type, 1);
                         varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2Type));
-                        varDsc->lvIs4Field2            = (int)emitActualTypeSize(arg2Type) == 4 ? 1 : 0;
-                        varDscInfo->hasMultiSlotStruct = true;
+                        varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2Type) == 4 ? 1 : 0;
                     }
                     else if (cSlots > 1)
                     {
                         varDsc->lvIsSplit = 1;
                         varDsc->SetOtherArgReg(REG_STK);
-                        varDscInfo->hasMultiSlotStruct = true;
                         varDscInfo->setAllRegArgUsed(arg1Type);
                         varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
                     }
@@ -1066,7 +1064,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                     if (cSlots == 2)
                     {
                         varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL));
-                        varDscInfo->hasMultiSlotStruct = true;
                     }
                 }
             }
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 207c79864470e..fa2a28179dd48 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -1189,7 +1189,24 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
         switch (blkNode->gtBlkOpKind)
         {
             case GenTreeBlk::BlkOpKindUnroll:
-                break;
+            {
+                if (dstAddr->isContained())
+                {
+                    // Since the dstAddr is contained the address will be computed in CodeGen.
+                    // This might require an integer register to store the value.
+                    buildInternalIntRegisterDefForNode(blkNode);
+                }
+
+                const bool isDstRegAddrAlignmentKnown = dstAddr->OperIsLocalAddr();
+
+                if (isDstRegAddrAlignmentKnown && (size > FP_REGSIZE_BYTES))
+                {
+                    // TODO-LoongArch64: For larger block sizes CodeGen can choose to use 16-byte SIMD instructions.
+                    // here just used a temp register.
+                    buildInternalFloatRegisterDefForNode(blkNode);
+                }
+            }
+            break;
 
             case GenTreeBlk::BlkOpKindHelper:
                 assert(!src->isContained());
@@ -1222,7 +1239,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
             if (size >= 2 * REGSIZE_BYTES)
             {
                 // TODO-LoongArch64: We will use ld/st paired to reduce code size and improve performance
-                // so we need to reserve an extra internal register
+                // so we need to reserve an extra internal register.
                 buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
             }
 
@@ -1243,8 +1260,27 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
             switch (blkNode->gtBlkOpKind)
             {
                 case GenTreeBlk::BlkOpKindUnroll:
+                {
                     buildInternalIntRegisterDefForNode(blkNode);
-                    break;
+
+                    const bool isSrcAddrLocal = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ||
+                                                ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr());
+                    const bool isDstAddrLocal = dstAddr->OperIsLocalAddr();
+
+                    // TODO-LoongArch64: using 16-byte SIMD instructions.
+                    const bool srcAddrMayNeedReg =
+                        isSrcAddrLocal || ((srcAddrOrFill != nullptr) && srcAddrOrFill->isContained());
+                    const bool dstAddrMayNeedReg = isDstAddrLocal || dstAddr->isContained();
+
+                    // The following allocates an additional integer register in a case
+                    // when a load instruction and a store instruction cannot be encoded using offset
+                    // from a corresponding base register.
+                    if (srcAddrMayNeedReg && dstAddrMayNeedReg)
+                    {
+                        buildInternalIntRegisterDefForNode(blkNode);
+                    }
+                }
+                break;
 
                 case GenTreeBlk::BlkOpKindHelper:
                     dstAddrRegMask = RBM_ARG_0;
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 6b89ad3c91c38..b95293dcacf7c 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -214,8 +214,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
         {
             if (!tree->gtOverflow())
             {
-#if defined(TARGET_ARM64) ||                                                                                           \
-    defined(TARGET_LOONGARCH64) // On ARM64 All non-overflow checking conversions can be optimized
+// ARM64 and LoongArch64 optimize all non-overflow checking conversions
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
                 return nullptr;
 #else
                 switch (dstType)
@@ -243,7 +243,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                     default:
                         unreached();
                 }
-#endif // TARGET_ARM64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
             }
             else
             {
@@ -938,7 +938,6 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned          argNum,
     if (numRegs == 2)
     {
         curArgTabEntry->setRegNum(1, otherRegNum);
-        // curArgTabEntry->isSplit = true;
     }
 
     return curArgTabEntry;
@@ -2038,7 +2037,12 @@ void fgArgInfo::EvalArgsToTemps()
                     {
                         setupArg = compiler->fgMorphCopyBlock(setupArg);
 #if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_LOONGARCH64)
+                        // For LoongArch64, the struct {float a; float b;} passed by float-registers.
+                        if ((lclVarType == TYP_STRUCT) && (curArgTabEntry->numRegs == 1))
+#else
                         if (lclVarType == TYP_STRUCT)
+#endif
                         {
                             // This scalar LclVar widening step is only performed for ARM architectures.
                             //
@@ -3016,7 +3020,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
         }
 #else // !UNIX_AMD64_ABI
-        size               = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot'
+        size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot'
         if (!isStructArg)
         {
             byteSize = genTypeSize(argx);
@@ -3307,10 +3311,10 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                     }
                 }
 
-                assert(!isHfaArg); // LOONGARCH not support HFA.
+                assert(!isHfaArg); // LoongArch64 does not support HFA.
             }
 
-            // if run out the fp argument register, try the int argument register.
+            // if we run out of floating-point argument registers, try the int argument registers.
             if (!isRegArg)
             {
                 // Check if the last register needed is still in the int argument register range.
@@ -3320,7 +3324,6 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
                 // Did we run out of registers when we had a 16-byte struct (size===2) ?
                 // (i.e we only have one register remaining but we needed two registers to pass this arg)
-                // This prevents us from backfilling a subsequent arg into x7
                 //
                 if (!isRegArg && (size > 1))
                 {
@@ -3329,7 +3332,6 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                     //
                     isRegArg        = intArgRegNum < maxRegArgs; // the split-struct case.
                     nextOtherRegNum = REG_STK;
-                    // assert((intArgRegNum + 1) == maxRegArgs);
                 }
             }
 #else // not TARGET_ARM or TARGET_ARM64 or TARGET_LOONGARCH64
@@ -3915,12 +3917,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         }
                     }
 #endif // UNIX_AMD64_ABI
-#elif defined(TARGET_ARM64)
-                    if ((passingSize != structSize) && (lclVar == nullptr))
-                    {
-                        copyBlkClass = objClass;
-                    }
-#elif defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
                     if ((passingSize != structSize) && (lclVar == nullptr))
                     {
                         copyBlkClass = objClass;
@@ -4136,12 +4133,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 #if FEATURE_MULTIREG_ARGS
         if (isStructArg)
         {
-#if defined(TARGET_LOONGARCH64)
-            if ((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1)
-#else
             if (((argEntry->numRegs + argEntry->GetStackSlotsNumber()) > 1) ||
                 (isHfaArg && argx->TypeGet() == TYP_STRUCT))
-#endif
             {
                 hasMultiregStructArgs = true;
             }
@@ -4373,28 +4366,6 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
         if ((size > 1) || (fgEntryPtr->IsHfaArg() && argx->TypeGet() == TYP_STRUCT))
         {
             foundStructArg = true;
-#if defined(TARGET_LOONGARCH64)
-            if (!argx->OperIs(GT_FIELD_LIST))
-            {
-                GenTree* newArgx = fgMorphMultiregStructArg(argx, fgEntryPtr);
-
-                // Did we replace 'argx' with a new tree?
-                if (newArgx != argx)
-                {
-                    // link the new arg node into either the late arg list or the gtCallArgs list
-                    if (isLateArg)
-                    {
-                        lateUse->SetNode(newArgx);
-                    }
-                    else
-                    {
-                        use.SetNode(newArgx);
-                    }
-
-                    assert(fgEntryPtr->GetNode() == newArgx);
-                }
-            }
-#else
             if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST))
             {
                 if (fgEntryPtr->IsHfaRegArg())
@@ -4444,7 +4415,6 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
                     assert(fgEntryPtr->GetNode() == newArgx);
                 }
             }
-#endif
         }
     }
 
@@ -4479,13 +4449,10 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
 //    this also forces the struct to be stack allocated into the local frame.
 //    For the GT_OBJ case will clone the address expression and generate two (or more)
 //    indirections.
-//    Currently the implementation handles ARM64/ARM and will NYI for other architectures.
 //
 GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntryPtr)
 {
-#if !defined(TARGET_LOONGARCH64)
     assert(varTypeIsStruct(arg->TypeGet()));
-#endif
 
 #if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
     NYI("fgMorphMultiregStructArg requires implementation for this target");
@@ -4536,40 +4503,11 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
     }
 
 #if FEATURE_MULTIREG_ARGS
-// Examine 'arg' and setup argValue objClass and structSize
-//
-#if defined(TARGET_LOONGARCH64)
-    const CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg);
-    if (objClass == NO_CLASS_HANDLE)
-    {
-        assert(arg->TypeGet() != TYP_STRUCT);
-        assert(arg->OperGet() == GT_LCL_FLD);
-        assert(fgEntryPtr->numRegs == 2);
-
-        GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
-        unsigned             varNum  = varNode->GetLclNum();
-        assert(varNum < lvaCount);
-        LclVarDsc* varDsc = &lvaTable[varNum];
-        assert(varDsc->lvExactSize == 8);
-
-        unsigned          offset   = arg->AsLclVarCommon()->GetLclOffs();
-        GenTreeFieldList* newArg   = nullptr;
-        var_types         tmp_type = fgEntryPtr->isPassedInFloatRegisters() ? TYP_FLOAT : TYP_INT;
-        arg->gtType                = tmp_type;
-
-        newArg = new (this, GT_FIELD_LIST) GenTreeFieldList();
-        newArg->AddField(this, arg, offset, tmp_type);
-        tmp_type            = isValidFloatArgReg(fgEntryPtr->GetOtherRegNum()) ? TYP_FLOAT : TYP_INT;
-        GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type, offset + 4);
-        newArg->AddField(this, nextLclFld, offset + 4, tmp_type);
-
-        return newArg;
-    }
-#else
-    const CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(arg);
-#endif
-    GenTree* argValue   = arg; // normally argValue will be arg, but see right below
-    unsigned structSize = 0;
+    // Examine 'arg' and setup argValue objClass and structSize
+    //
+    const CORINFO_CLASS_HANDLE objClass   = gtGetStructHandle(arg);
+    GenTree*                   argValue   = arg; // normally argValue will be arg, but see right below
+    unsigned                   structSize = 0;
 
     if (arg->TypeGet() != TYP_STRUCT)
     {
@@ -4724,7 +4662,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
 #endif // DEBUG
 
 #ifndef UNIX_AMD64_ABI
-#if !defined(TARGET_LOONGARCH64)
         // This local variable must match the layout of the 'objClass' type exactly
         if (varDsc->lvIsHfa() && fgEntryPtr->isPassedInFloatRegisters())
         {
@@ -4740,7 +4677,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             }
         }
         else
-#endif
         {
 #if defined(TARGET_ARM64)
             // We must have a 16-byte struct (non-HFA)
@@ -4791,7 +4727,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
         }
         else
 #endif // !UNIX_AMD64_ABI
-#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI)
+#if defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
             // Is this LclVar a promoted struct with exactly 2 fields?
             if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
         {
@@ -4899,16 +4835,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             //
             lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DoNotEnregisterReason::LocalField));
         }
-#elif defined(TARGET_LOONGARCH64)
-        // Is this LclVar a promoted struct with exactly same size?
-        assert(!varDsc->lvPromoted);
-
-        assert(structSize >= TARGET_POINTER_SIZE);
-        {
-            // We will create a list of GT_LCL_FLDs nodes to pass this struct
-            lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DoNotEnregisterReason::LocalField));
-        }
-#endif // TARGET_LOONGARCH64
+#endif // TARGET_ARM
     }
 
     // If we didn't set newarg to a new List Node tree
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
index 2106d3f2bc0b1..daf251b33477d 100644
--- a/src/coreclr/jit/targetloongarch64.h
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -98,7 +98,7 @@
   #define REG_CALLEE_SAVED_ORDER   REG_S0,REG_S1,REG_S2,REG_S3,REG_S4,REG_S5,REG_S6,REG_S7,REG_S8
   #define RBM_CALLEE_SAVED_ORDER   RBM_S0,RBM_S1,RBM_S2,RBM_S3,RBM_S4,RBM_S5,RBM_S6,RBM_S7,RBM_S8
 
-  #define CNT_CALLEE_SAVED        (9)             //s0-s8, not including fp,ra.
+  #define CNT_CALLEE_SAVED        (10)             //s0-s8,fp.
   #define CNT_CALLEE_TRASH        (17)
   #define CNT_CALLEE_ENREG        (CNT_CALLEE_SAVED-1)
 

From 38b91f2f47db57f161e24312247afcc527b8c268 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Thu, 17 Mar 2022 10:06:58 +0800
Subject: [PATCH 35/46] [LoongArch64] amend some code for CR.

---
 src/coreclr/gcinfo/CMakeLists.txt      |    9 +-
 src/coreclr/jit/CMakeLists.txt         |    7 +-
 src/coreclr/jit/codegenloongarch64.cpp |   60 +-
 src/coreclr/jit/emit.cpp               |  245 +--
 src/coreclr/jit/emit.h                 |   43 +-
 src/coreclr/jit/emitfmtsloongarch64.h  |   29 +-
 src/coreclr/jit/emitloongarch64.cpp    | 2472 ++++++++++++++----------
 src/coreclr/jit/emitloongarch64.h      |   18 +
 src/coreclr/jit/lsraloongarch64.cpp    |   23 +-
 src/coreclr/jit/registerloongarch64.h  |    2 +-
 10 files changed, 1510 insertions(+), 1398 deletions(-)

diff --git a/src/coreclr/gcinfo/CMakeLists.txt b/src/coreclr/gcinfo/CMakeLists.txt
index 5f10c54e5d9f9..34b3843d6893e 100644
--- a/src/coreclr/gcinfo/CMakeLists.txt
+++ b/src/coreclr/gcinfo/CMakeLists.txt
@@ -69,12 +69,15 @@ if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM)
   create_gcinfo_lib(TARGET gcinfo_${TARGET_OS_NAME}_${ARCH_TARGET_NAME} OS ${TARGET_OS_NAME} ARCH ${ARCH_TARGET_NAME})
 endif()
 
-if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
-  create_gcinfo_lib(TARGET gcinfo_unix_loongarch64 OS unix ARCH loongarch64)
+if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
   create_gcinfo_lib(TARGET gcinfo_universal_arm64 OS universal ARCH arm64)
   create_gcinfo_lib(TARGET gcinfo_unix_x64 OS unix ARCH x64)
   create_gcinfo_lib(TARGET gcinfo_win_x64 OS win ARCH x64)
-endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
+
+if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+  create_gcinfo_lib(TARGET gcinfo_unix_loongarch64 OS unix ARCH loongarch64)
+endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
 
 create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm)
 create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86)
diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt
index 613cba4265138..927bf7a238ac5 100644
--- a/src/coreclr/jit/CMakeLists.txt
+++ b/src/coreclr/jit/CMakeLists.txt
@@ -576,12 +576,15 @@ install_clr(TARGETS clrjit DESTINATIONS . sharedFramework COMPONENT jit)
 # Enable profile guided optimization
 add_pgo(clrjit)
 
-if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+if (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
   create_standalone_jit(TARGET clrjit_universal_arm64_${ARCH_HOST_NAME} OS universal ARCH arm64 DESTINATIONS .)
   create_standalone_jit(TARGET clrjit_unix_x64_${ARCH_HOST_NAME} OS unix ARCH x64 DESTINATIONS .)
   create_standalone_jit(TARGET clrjit_win_x64_${ARCH_HOST_NAME} OS win ARCH x64 DESTINATIONS .)
+endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64)
+
+if (CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
   create_standalone_jit(TARGET clrjit_unix_loongarch64_${ARCH_HOST_NAME} OS unix ARCH loongarch64 DESTINATIONS .)
-endif (CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+endif (CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
 
 create_standalone_jit(TARGET clrjit_universal_arm_${ARCH_HOST_NAME} OS universal ARCH arm DESTINATIONS .)
 target_compile_definitions(clrjit_universal_arm_${ARCH_HOST_NAME} PRIVATE ARM_SOFTFP CONFIGURABLE_ARM_ABI)
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 07c876496da57..260b54ee15000 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -21,24 +21,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #include "gcinfo.h"
 #include "gcinfoencoder.h"
 
-// Returns true if 'value' is a legal signed immediate 12 bit encoding.
-static bool isValidSimm12(ssize_t value)
-{
-    return -(((int)1) << 11) <= value && value < (((int)1) << 11);
-}
-
-// Returns true if 'value' is a legal unsigned immediate 11 bit encoding.
-static bool isValidUimm11(ssize_t value)
-{
-    return (0 == (value >> 11));
-}
-
-// Returns true if 'value' is a legal unsigned immediate 12 bit encoding.
-static bool isValidUimm12(ssize_t value)
-{
-    return (0 == (value >> 12));
-}
-
 /*
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -114,7 +96,7 @@ bool CodeGen::genInstrWithConstant(instruction ins,
             break;
     }
 #endif
-    bool immFitsInIns = isValidSimm12(imm);
+    bool immFitsInIns = emitter::isValidSimm12(imm);
 
     if (immFitsInIns)
     {
@@ -1653,7 +1635,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu
     assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) == 0);
     assert(untrLclLo % 4 == 0);
 
-    if (isValidSimm12(untrLclLo))
+    if (emitter::isValidSimm12(untrLclLo))
     {
         GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
     }
@@ -2432,7 +2414,7 @@ void CodeGen::genLclHeap(GenTree* tree)
 
             lastTouchDelta = amount;
             imm            = -(ssize_t)amount;
-            if (isValidSimm12(imm))
+            if (emitter::isValidSimm12(imm))
             {
                 emit->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
             }
@@ -4297,11 +4279,11 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
 
             if (tree->OperIs(GT_LT))
             {
-                if (!IsUnsigned && isValidSimm12(imm))
+                if (!IsUnsigned && emitter::isValidSimm12(imm))
                 {
                     emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
                 }
-                else if (IsUnsigned && isValidUimm11(imm))
+                else if (IsUnsigned && emitter::isValidUimm11(imm))
                 {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm);
                 }
@@ -4313,11 +4295,11 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
             }
             else if (tree->OperIs(GT_LE))
             {
-                if (!IsUnsigned && isValidSimm12(imm + 1))
+                if (!IsUnsigned && emitter::isValidSimm12(imm + 1))
                 {
                     emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm + 1);
                 }
-                else if (IsUnsigned && isValidUimm11(imm + 1))
+                else if (IsUnsigned && emitter::isValidUimm11(imm + 1))
                 {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm + 1);
                 }
@@ -4329,12 +4311,12 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
             }
             else if (tree->OperIs(GT_GT))
             {
-                if (!IsUnsigned && isValidSimm12(imm + 1))
+                if (!IsUnsigned && emitter::isValidSimm12(imm + 1))
                 {
                     emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, REG_RA, regOp1, imm + 1);
                     emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1);
                 }
-                else if (IsUnsigned && isValidUimm11(imm + 1))
+                else if (IsUnsigned && emitter::isValidUimm11(imm + 1))
                 {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, REG_RA, regOp1, imm + 1);
                     emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, REG_RA, 1);
@@ -4347,11 +4329,11 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
             }
             else if (tree->OperIs(GT_GE))
             {
-                if (!IsUnsigned && isValidSimm12(imm))
+                if (!IsUnsigned && emitter::isValidSimm12(imm))
                 {
                     emit->emitIns_R_R_I(INS_slti, EA_PTRSIZE, targetReg, regOp1, imm);
                 }
-                else if (IsUnsigned && isValidUimm11(imm))
+                else if (IsUnsigned && emitter::isValidUimm11(imm))
                 {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, imm);
                 }
@@ -4368,7 +4350,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
                 {
                     emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, regOp1);
                 }
-                else if (isValidUimm12(imm))
+                else if (emitter::isValidUimm12(imm))
                 {
                     emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
                     emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, targetReg, REG_R0, targetReg);
@@ -4386,7 +4368,7 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
                 {
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, regOp1, 1);
                 }
-                else if (isValidUimm12(imm))
+                else if (emitter::isValidUimm12(imm))
                 {
                     emit->emitIns_R_R_I(INS_xori, EA_PTRSIZE, targetReg, regOp1, imm);
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
@@ -5414,7 +5396,7 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm
     // function that does a probe, which will in turn call this function.
     assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize());
 
-    if (isValidSimm12(spDelta))
+    if (emitter::isValidSimm12(spDelta))
     {
         GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta);
     }
@@ -8106,7 +8088,7 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d
 
             if (castMinValue != 0)
             {
-                if (isValidSimm12(castMinValue))
+                if (emitter::isValidSimm12(castMinValue))
                 {
                     GetEmitter()->emitIns_R_R_I(INS_slti, EA_ATTR(desc.CheckSrcSize()), REG_R21, reg, castMinValue);
                 }
@@ -8427,7 +8409,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
 
         if (offset != 0)
         {
-            if (isValidSimm12(offset))
+            if (emitter::isValidSimm12(offset))
             {
                 emit->emitIns_R_R_I(INS_addi_d, size, tmpReg, tmpReg, offset);
             }
@@ -8451,7 +8433,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
     {
         GenTree* memBase = lea->Base();
 
-        if (isValidSimm12(offset))
+        if (emitter::isValidSimm12(offset))
         {
             if (offset != 0)
             {
@@ -8508,7 +8490,7 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
     }
     else
     {
-        assert(isValidSimm12(delta));
+        assert(emitter::isValidSimm12(delta));
         GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
     }
 
@@ -9589,7 +9571,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
 
             base += baseOffset;
 
-            if (isValidSimm12(base))
+            if (emitter::isValidSimm12(base))
             {
                 GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
             }
@@ -9641,7 +9623,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                 {
                     base += baseOffset;
 
-                    if (isValidSimm12(base))
+                    if (emitter::isValidSimm12(base))
                     {
                         GetEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
                     }
@@ -9672,7 +9654,7 @@ void CodeGen::genFnPrologCalleeRegArgs()
                     base += 8;
 
                     GetEmitter()->emitIns_R_R_Imm(INS_ld_d, size, REG_SCRATCH, REG_SPBASE, genTotalFrameSize());
-                    if (isValidSimm12(base))
+                    if (emitter::isValidSimm12(base))
                     {
                         GetEmitter()->emitIns_S_R(INS_st_d, size, REG_SCRATCH, varNum, baseOffset);
                     }
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index 30c28972ac01d..55d517f996d1d 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -743,12 +743,8 @@ insGroup* emitter::emitSavIG(bool emitAdd)
 
         ig->igFlags |= IGF_BYREF_REGS;
 
-// We'll allocate extra space (DWORD aligned) to record the GC regs
-#if defined(TARGET_LOONGARCH64)
-        gs += sizeof(regMaskTP);
-#else
+        // We'll allocate extra space (DWORD aligned) to record the GC regs
         gs += sizeof(int);
-#endif
     }
 
     // Allocate space for the instructions and optional liveset
@@ -759,13 +755,8 @@ insGroup* emitter::emitSavIG(bool emitAdd)
 
     if (ig->igFlags & IGF_BYREF_REGS)
     {
-// Record the byref regs in front the of the instructions
-
-#if defined(TARGET_LOONGARCH64)
-        *castto(id, regMaskTP*)++ = emitInitByrefRegs;
-#else
+        // Record the byref regs in front the of the instructions
         *castto(id, unsigned*)++ = (unsigned)emitInitByrefRegs;
-#endif
     }
 
     // Do we need to store the liveset?
@@ -1310,7 +1301,8 @@ weight_t emitter::getCurrentBlockWeight()
 #if defined(TARGET_LOONGARCH64)
 void emitter::dispIns(instrDesc* id)
 {
-    assert(!"Not used on LOONGARCH64.");
+    // For LoongArch64 using the emitDisInsName().
+    NYI_LOONGARCH64("Not used on LOONGARCH64.");
 }
 #else
 void emitter::dispIns(instrDesc* id)
@@ -3022,11 +3014,9 @@ void emitter::emitGenerateUnwindNop(instrDesc* id, void* context)
     Compiler* comp = (Compiler*)context;
 #if defined(TARGET_ARM)
     comp->unwindNop(id->idCodeSize());
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     comp->unwindNop();
-#elif defined(TARGET_LOONGARCH64)
-    comp->unwindNop();
-#endif // defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 }
 
 /*****************************************************************************
@@ -4111,8 +4101,10 @@ void emitter::emitDispCommentForHandle(size_t handle, GenTreeFlags flag)
  *  ARM64 has a small and large encoding for both conditional branch and loading label addresses.
  *      The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently
  *      NYI).
+ *  LoongArch64 has an individual implementation for emitJumpDistBind().
  */
 
+#ifndef TARGET_LOONGARCH64
 void emitter::emitJumpDistBind()
 {
 #ifdef DEBUG
@@ -4146,22 +4138,6 @@ void emitter::emitJumpDistBind()
 
     int jmp_iteration = 1;
 
-#ifdef TARGET_LOONGARCH64
-    // NOTE:
-    //  bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J;
-    //  bit1 of isLinkingEnd_LA: indicating not needed updating ths size while emitTotalCodeSize <= (0x7fff << 2) or had
-    //  updated;
-    unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0;
-
-    UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot.
-    // small  jump max. neg distance
-    NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG;
-    // small  jump max. pos distance
-    NATIVE_OFFSET psd =
-        B_DIST_SMALL_MAX_POS -
-        emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
-#endif
-
 /*****************************************************************************/
 /* If we iterate to look for more jumps to shorten, we start again here.     */
 /*****************************************************************************/
@@ -4198,11 +4174,9 @@ void emitter::emitJumpDistBind()
 
         UNATIVE_OFFSET jsz; // size of the jump instruction in bytes
 
-#ifndef TARGET_LOONGARCH64
         UNATIVE_OFFSET ssz = 0; // small  jump size
         NATIVE_OFFSET  nsd = 0; // small  jump max. neg distance
         NATIVE_OFFSET  psd = 0; // small  jump max. pos distance
-#endif
 
 #if defined(TARGET_ARM)
         UNATIVE_OFFSET msz = 0; // medium jump size
@@ -4217,10 +4191,8 @@ void emitter::emitJumpDistBind()
                                         // offset of the jump
         UNATIVE_OFFSET dstOffs;
         NATIVE_OFFSET  jmpDist; // the relative jump distance, as it will be encoded
-#ifndef TARGET_LOONGARCH64
         UNATIVE_OFFSET oldSize;
         UNATIVE_OFFSET sizeDif;
-#endif
 
 #ifdef TARGET_XARCH
         assert(jmp->idInsFmt() == IF_LABEL || jmp->idInsFmt() == IF_RWR_LABEL || jmp->idInsFmt() == IF_SWR_LABEL);
@@ -4323,14 +4295,7 @@ void emitter::emitJumpDistBind()
 /* Make sure the jumps are properly ordered */
 
 #ifdef DEBUG
-#if defined(TARGET_LOONGARCH64)
-#if defined(UNALIGNED_CHECK_DISABLE)
-        UNALIGNED_CHECK_DISABLE;
-#endif
-        assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < (jmp->idjOffs + adjLJ));
-#else
         assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < jmp->idjOffs);
-#endif
         lastLJ = (lastIG == jmp->idjIG) ? jmp : nullptr;
 
         assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG ||
@@ -4364,19 +4329,11 @@ void emitter::emitJumpDistBind()
                     if (EMITVERBOSE)
                     {
                         printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
-#if defined(TARGET_LOONGARCH64)
-                               lstIG->igOffs + adjIG
-#else
-                               lstIG->igOffs - adjIG
-#endif
-                               );
+                               lstIG->igOffs - adjIG);
                     }
 #endif // DEBUG
-#if defined(TARGET_LOONGARCH64)
-                    lstIG->igOffs += adjIG;
-#else
+
                     lstIG->igOffs -= adjIG;
-#endif
                     assert(IsCodeAligned(lstIG->igOffs));
                 } while (lstIG != jmpIG);
             }
@@ -4387,13 +4344,9 @@ void emitter::emitJumpDistBind()
             lstIG = jmpIG;
         }
 
-/* Apply any local size adjustment to the jump's relative offset */
+        /* Apply any local size adjustment to the jump's relative offset */
 
-#if defined(TARGET_LOONGARCH64)
-        jmp->idjOffs += adjLJ;
-#else
         jmp->idjOffs -= adjLJ;
-#endif
 
         // If this is a jump via register, the instruction size does not change, so we are done.
         CLANG_FORMAT_COMMENT_ANCHOR;
@@ -4441,9 +4394,8 @@ void emitter::emitJumpDistBind()
 
             if (jmp->idjShort)
             {
-#ifndef TARGET_LOONGARCH64
                 assert(jmp->idCodeSize() == ssz);
-#endif
+
                 // We should not be jumping/branching across funclets/functions
                 emitCheckFuncletBranch(jmp, jmpIG);
 
@@ -4547,17 +4499,13 @@ void emitter::emitJumpDistBind()
 
         if (jmpIG->igNum < tgtIG->igNum)
         {
-/* Forward jump */
+            /* Forward jump */
 
-/* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between
-   here and the target could be shortened, causing the actual distance to shrink.
- */
+            /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between
+               here and the target could be shortened, causing the actual distance to shrink.
+             */
 
-#if defined(TARGET_LOONGARCH64)
-            dstOffs += adjIG;
-#else
             dstOffs -= adjIG;
-#endif
 
             /* Compute the distance estimate */
 
@@ -4592,67 +4540,11 @@ void emitter::emitJumpDistBind()
             }
 #endif // DEBUG_EMIT
 
-#if defined(TARGET_LOONGARCH64)
-            assert(jmpDist >= 0); // Forward jump
-            assert(!(jmpDist & 0x3));
-
-            if (isLinkingEnd_LA & 0x2)
-            {
-                jmp->idAddr()->iiaSetJmpOffset(jmpDist);
-            }
-            else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J))
-            {
-                instruction ins = jmp->idIns();
-                assert((INS_bceqz <= ins) && (ins <= INS_bl));
-
-                if (ins <
-                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
-                {
-                    if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
-                    {
-                        extra = 4;
-                    }
-                    else
-                    {
-                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000); // TODO:later will be deleted!!!
-                        extra = 8;
-                    }
-                }
-                else if (ins < INS_b) //   beqz/bnez < b < bl    // See instrsloongarch64.h.
-                {
-                    if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000)
-                        continue;
-
-                    extra = 4;
-                    // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
-                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
-                }
-                else // if (ins == INS_b || ins == INS_bl)
-                {
-                    assert(ins == INS_b || ins == INS_bl);
-                    // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
-                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
-                    continue;
-                }
-
-                jmp->idInsOpt(INS_OPTS_JIRL);
-                jmp->idCodeSize(jmp->idCodeSize() + extra);
-                jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
-                adjLJ += (UNATIVE_OFFSET)extra;
-                adjIG += (UNATIVE_OFFSET)extra;
-                emitTotalCodeSize += (UNATIVE_OFFSET)extra;
-                jmpIG->igFlags |= IGF_UPD_ISZ;
-                isLinkingEnd_LA |= 0x1;
-            }
-            continue;
-
-#else // not defined(TARGET_LOONGARCH64)
             if (extra <= 0)
             {
                 /* This jump will be a short one */
                 goto SHORT_JMP;
             }
-#endif
         }
         else
         {
@@ -4691,70 +4583,13 @@ void emitter::emitJumpDistBind()
             }
 #endif // DEBUG_EMIT
 
-#if defined(TARGET_LOONGARCH64)
-            assert(jmpDist >= 0); // Backward jump
-            assert(!(jmpDist & 0x3));
-
-            if (isLinkingEnd_LA & 0x2)
-            {
-                jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative!
-            }
-            else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J))
-            {
-                instruction ins = jmp->idIns();
-                assert((INS_bceqz <= ins) && (ins <= INS_bl));
-
-                if (ins <
-                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
-                {
-                    if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
-                    {
-                        extra = 4;
-                    }
-                    else
-                    {
-                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
-                        extra = 8;
-                    }
-                }
-                else if (ins < INS_b) //   beqz/bnez < b < bl    // See instrsloongarch64.h.
-                {
-                    if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000)
-                        continue;
-
-                    extra = 4;
-                    // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
-                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
-                }
-                else
-                {
-                    assert(ins == INS_b || ins == INS_bl);
-                    // assert((emitTotalCodeSize + emitCounts_INS_OPTS_J*4) < 0x8000000);
-                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
-                    continue;
-                }
-
-                jmp->idInsOpt(INS_OPTS_JIRL);
-                jmp->idCodeSize(jmp->idCodeSize() + extra);
-                jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
-                adjLJ += (UNATIVE_OFFSET)extra;
-                adjIG += (UNATIVE_OFFSET)extra;
-                emitTotalCodeSize += (UNATIVE_OFFSET)extra;
-                jmpIG->igFlags |= IGF_UPD_ISZ;
-                isLinkingEnd_LA |= 0x1;
-            }
-            continue;
-
-#else // not defined(TARGET_LOONGARCH64)
             if (extra <= 0)
             {
                 /* This jump will be a short one */
                 goto SHORT_JMP;
             }
-#endif
         }
 
-#ifndef TARGET_LOONGARCH64
         /* We arrive here if the jump couldn't be made short, at least for now */
 
         /* We had better not have eagerly marked the jump as short
@@ -4946,40 +4781,8 @@ void emitter::emitJumpDistBind()
 
         jmpIG->igFlags |= IGF_UPD_ISZ;
 
-#endif // not defined(TARGET_LOONGARCH64)
-    }  // end for each jump
-
-#if defined(TARGET_LOONGARCH64)
-    if ((isLinkingEnd_LA & 0x3) < 0x2)
-    {
-        // indicating had updated the instrDescJmp's size with the type INS_OPTS_J.
-        isLinkingEnd_LA = 0x2;
-        // emitRecomputeIGoffsets();
-        /* Adjust offsets of any remaining blocks */
-
-        for (; lstIG;)
-        {
-            lstIG = lstIG->igNext;
-            if (!lstIG)
-            {
-                break;
-            }
-#ifdef DEBUG
-            if (EMITVERBOSE)
-            {
-                printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
-                       lstIG->igOffs + adjIG);
-            }
-#endif // DEBUG
+    } // end for each jump
 
-            lstIG->igOffs += adjIG;
-
-            assert(IsCodeAligned(lstIG->igOffs));
-        }
-        goto AGAIN;
-    }
-
-#else
     /* Did we shorten any jumps? */
 
     if (adjIG)
@@ -5043,7 +4846,6 @@ void emitter::emitJumpDistBind()
             goto AGAIN;
         }
     }
-#endif
 
 #ifdef DEBUG
     if (EMIT_INSTLIST_VERBOSE)
@@ -5055,6 +4857,7 @@ void emitter::emitJumpDistBind()
     emitCheckIGoffsets();
 #endif // DEBUG
 }
+#endif
 
 #if FEATURE_LOOP_ALIGN
 
@@ -5866,7 +5669,7 @@ emitter::instrDescAlign* emitter::emitAlignInNextIG(instrDescAlign* alignInstr)
 void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG)
 {
 #ifdef TARGET_LOONGARCH64
-    /* TODO: for LOONGARCH64: not support idDebugOnlyInfo.*/
+    // TODO-LoongArch64: support idDebugOnlyInfo.
     return;
 #else
 
@@ -7045,13 +6848,7 @@ void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp)
  *  instruction number for this instruction
  */
 
-#if defined(TARGET_LOONGARCH64)
-unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch)
-{
-    assert(!"unimplemented yet on LOONGARCH");
-    return -1;
-}
-#else
+#ifndef TARGET_LOONGARCH64
 unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch)
 {
     instrDesc* id = (instrDesc*)ig->igData;
@@ -9532,7 +9329,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
             result = RBM_CALLEE_TRASH_NOGC & ~(RBM_RDI | RBM_RSI);
             break;
 #elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
-            result      = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
+            result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
             break;
 #else
             assert(!"unknown arch");
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index 28d52d5e0bd38..c64a67192b645 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -344,11 +344,8 @@ struct insGroup
             ptr -= sizeof(VARSET_TP);
         }
 
-#if defined(TARGET_LOONGARCH64)
-        ptr -= sizeof(VARSET_TP);
-#else
         ptr -= sizeof(unsigned);
-#endif
+
         return *(unsigned*)ptr;
     }
 
@@ -677,7 +674,7 @@ class emitter
                                   // At this point we have fully consumed first DWORD so that next field
                                   // doesn't cross a byte boundary.
 #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
-/* _idOpSize defined bellow. */
+/* _idOpSize defined below. */
 #else
         opSize    _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
 #endif // ARM || TARGET_LOONGARCH64
@@ -1793,11 +1790,13 @@ class emitter
 
 #endif // FEATURE_EH_FUNCLETS
 
-    /************************************************************************/
-    /*    Methods to record a code position and later convert to offset     */
-    /************************************************************************/
+/************************************************************************/
+/*    Methods to record a code position and later convert to offset     */
+/************************************************************************/
 
+#ifndef TARGET_LOONGARCH64
     unsigned emitFindInsNum(insGroup* ig, instrDesc* id);
+#endif
     UNATIVE_OFFSET emitFindOffset(insGroup* ig, unsigned insNum);
 
 /************************************************************************/
@@ -1957,7 +1956,7 @@ class emitter
     //
     CLANG_FORMAT_COMMENT_ANCHOR;
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 // ARM32 and ARM64 both can require a bigger prolog instruction group. One scenario is where
 // a function uses all the incoming integer and single-precision floating-point arguments,
 // and must store them all to the frame on entry. If the frame is very large, we generate
@@ -1972,13 +1971,9 @@ class emitter
 // which eats up our insGroup buffer.
 #define SC_IG_BUFFER_SIZE (200 * sizeof(emitter::instrDesc))
 
-#elif defined(TARGET_LOONGARCH64)
-
-#define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 20 * SMALL_IDSC_SIZE)
-
-#else // !TARGET_LOONGARCH64
+#else
 #define SC_IG_BUFFER_SIZE (50 * sizeof(emitter::instrDesc) + 14 * SMALL_IDSC_SIZE)
-#endif // !TARGET_LOONGARCH64
+#endif // !(TARGET_ARMARCH || TARGET_LOONGARCH64)
 
     size_t emitIGbuffSize;
 
@@ -2160,7 +2155,7 @@ class emitter
     const char* emitLabelString(insGroup* ig);
 #endif
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
     void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt);
 
@@ -2174,19 +2169,7 @@ class emitter
 
     static void emitGenerateUnwindNop(instrDesc* id, void* context);
 
-#elif defined(TARGET_LOONGARCH64)
-    void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt);
-    bool emitGetLocationInfo(emitLocation* emitLoc, insGroup** pig, instrDesc** pid, int* pinsRemaining = NULL);
-
-    bool emitNextID(insGroup*& ig, instrDesc*& id, int& insRemaining);
-
-    typedef void (*emitProcessInstrFunc_t)(instrDesc* id, void* context);
-
-    void emitWalkIDs(emitLocation* locFrom, emitProcessInstrFunc_t processFunc, void* context);
-
-    static void emitGenerateUnwindNop(instrDesc* id, void* context);
-
-#endif // TARGET_LOONGARCH64
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
 #ifdef TARGET_X86
     void emitMarkStackLvl(unsigned stackLevel);
@@ -2356,8 +2339,10 @@ class emitter
     static emitJumpKind emitReverseJumpKind(emitJumpKind jumpKind);
 
 #ifdef DEBUG
+#ifndef TARGET_LOONGARCH64
     void emitInsSanityCheck(instrDesc* id);
 #endif
+#endif
 
 #ifdef TARGET_ARMARCH
     // Returns true if instruction "id->idIns()" writes to a register that might be used to contain a GC
diff --git a/src/coreclr/jit/emitfmtsloongarch64.h b/src/coreclr/jit/emitfmtsloongarch64.h
index 2f47160ac8d39..3dab2b7dc2704 100644
--- a/src/coreclr/jit/emitfmtsloongarch64.h
+++ b/src/coreclr/jit/emitfmtsloongarch64.h
@@ -2,6 +2,8 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 //////////////////////////////////////////////////////////////////////////////
+// define this file for LoongArch64 just for avoiding compiling errors.
+// This is moot right now.
 
 // clang-format off
 #if !defined(TARGET_LOONGARCH64)
@@ -33,32 +35,7 @@ enum ID_OPS
 //                  (unused)
 //////////////////////////////////////////////////////////////////////////////
 
-IF_DEF(NONE, IS_NONE, NONE) //
-
-IF_DEF(OPCODE, IS_NONE, NONE)
-IF_DEF(OPCODES_16, IS_NONE, NONE)
-IF_DEF(OP_FMT, IS_NONE, NONE)
-IF_DEF(OP_FMT_16, IS_NONE, NONE)
-IF_DEF(OP_FMTS_16, IS_NONE, NONE)
-IF_DEF(FMT_FUNC, IS_NONE, NONE)
-IF_DEF(FMT_FUNC_6, IS_NONE, NONE)
-IF_DEF(FMT_FUNC_16, IS_NONE, NONE)
-IF_DEF(FMT_FUNCS_6, IS_NONE, NONE)
-IF_DEF(FMT_FUNCS_16, IS_NONE, NONE)
-IF_DEF(FMT_FUNCS_6A, IS_NONE, NONE)
-IF_DEF(FMT_FUNCS_11A, IS_NONE, NONE)
-IF_DEF(FUNC, IS_NONE, NONE)
-IF_DEF(FUNC_6, IS_NONE, NONE)
-IF_DEF(FUNC_16, IS_NONE, NONE)
-IF_DEF(FUNC_21, IS_NONE, NONE)
-IF_DEF(FUNCS_6, IS_NONE, NONE)
-IF_DEF(FUNCS_6A, IS_NONE, NONE)
-IF_DEF(FUNCS_6B, IS_NONE, NONE)
-IF_DEF(FUNCS_6C, IS_NONE, NONE)
-IF_DEF(FUNCS_6D, IS_NONE, NONE)
-IF_DEF(FUNCS_6E, IS_NONE, NONE)
-IF_DEF(FUNCS_11, IS_NONE, NONE)
-
+IF_DEF(NONE, IS_NONE, NONE)
 
 //////////////////////////////////////////////////////////////////////////////
 #undef IF_DEF
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index eb0b8659632bb..c7ba9f4a55ffe 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -384,60 +384,6 @@ const emitJumpKind emitReverseJumpKinds[] = {
 #include "emitjmps.h"
 };
 
-/*****************************************************************************
- * The macro define for instructions.
- */
-
-#define D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)                                                              \
-    op0_code |= ((code_t)(op1_reg));      /* rd or fd or hint */                                                       \
-    op0_code |= ((code_t)(op2_reg)) << 5; /* rj */                                                                     \
-    op0_code |= ((op3_imm)&0xfff) << 10
-
-#define D_INST_add_d(op0_code, op1_reg, op2_reg, op3_reg)                                                              \
-    op0_code |= ((code_t)(op1_reg));      /* rd */                                                                     \
-    op0_code |= ((code_t)(op2_reg)) << 5; /* rj */                                                                     \
-    op0_code |= ((code_t)(op3_reg)) << 10 /* rk */
-
-#define D_INST_3R(op0_code, op1_reg, op2_reg, op3_reg)                                                                 \
-    op0_code |= ((code_t)(op1_reg));      /* rd */                                                                     \
-    op0_code |= ((code_t)(op2_reg)) << 5; /* rj */                                                                     \
-    op0_code |= ((code_t)(op3_reg)) << 10 /* rk */
-
-#define D_INST_JIRL(op0_code, op1_reg, op2_reg, op3_imm)                                                               \
-    op0_code |= ((code_t)(op1_reg));      /* rd */                                                                     \
-    op0_code |= ((code_t)(op2_reg)) << 5; /* rj */                                                                     \
-    op0_code |= ((op3_imm)&0xffff) << 10  /* offs */
-
-#define D_INST_lu12i_w(op0_code, op1_reg, op2_imm)                                                                     \
-    op0_code |= ((code_t)(op1_reg));     /* rd */                                                                      \
-    op0_code |= ((op2_imm)&0xfffff) << 5 /* si20 */
-
-#define D_INST_lu32i_d(op0_code, op1_reg, op2_imm) D_INST_lu12i_w(op0_code, op1_reg, op2_imm)
-
-#define D_INST_lu52i_d(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
-
-#define D_INST_ori(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
-
-// Load or Store instructions.
-#define D_INST_LS(op0_code, op1_reg, op2_reg, op3_imm) D_INST_2RI12(op0_code, op1_reg, op2_reg, op3_imm)
-
-#define D_INST_Bcond(op0_code, op1_reg, op2_reg, op3_imm)                                                              \
-    op0_code |= ((code_t)(op1_reg) /*& 0x1f */) << 5; /* rj */                                                         \
-    op0_code |= ((code_t)(op2_reg) /*& 0x1f */);      /* rd */                                                         \
-    assert(!((code_t)(op3_imm)&0x3));                                                                                  \
-    op0_code |= (((code_t)(op3_imm) << 8) & 0x3fffc00) /* offset */
-
-#define D_INST_Bcond_Z(op0_code, op1_reg, op1_imm)                                                                     \
-    assert(!((code_t)(op1_imm)&0x3));                                                                                  \
-    op0_code |= ((code_t)(op1_reg) /*& 0x1f */) << 5; /* rj */                                                         \
-    op0_code |= (((code_t)(op1_imm) << 8) & 0x3fffc00);                                                                \
-    op0_code |= (((code_t)(op1_imm) >> 18) & 0x1f) /* offset */
-
-#define D_INST_B(op0_code, op1_imm)                                                                                    \
-    assert(!((code_t)(op1_imm)&0x3));                                                                                  \
-    op0_code |= (((code_t)(op1_imm) >> 18) & 0x3ff);                                                                   \
-    op0_code |= (((code_t)(op1_imm) << 8) & 0x3fffc00) /* offset */
-
 /*****************************************************************************
  * Look up the instruction for a jump kind
  */
@@ -500,7 +446,6 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id)
                 assert(!id->idIsLargeCns());
                 return sizeof(instrDesc);
             }
-        // break;
 
         case INS_OPTS_I:
         case INS_OPTS_RC:
@@ -514,54 +459,10 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id)
     }
 }
 
-#ifdef DEBUG
-/*****************************************************************************
- *
- *  The following called for each recorded instruction -- use for debugging.
- */
-void emitter::emitInsSanityCheck(instrDesc* id)
-{
-    /* What instruction format have we got? */
-
-    switch (id->idInsFmt())
-    {
-        case IF_OPCODE:
-        case IF_OPCODES_16:
-        case IF_OP_FMT:
-        case IF_OP_FMT_16:
-        case IF_OP_FMTS_16:
-        case IF_FMT_FUNC:
-        case IF_FMT_FUNC_6:
-        case IF_FMT_FUNC_16:
-        case IF_FMT_FUNCS_6:
-        case IF_FMT_FUNCS_16:
-        case IF_FMT_FUNCS_6A:
-        case IF_FMT_FUNCS_11A:
-        case IF_FUNC:
-        case IF_FUNC_6:
-        case IF_FUNC_16:
-        case IF_FUNC_21:
-        case IF_FUNCS_6:
-        case IF_FUNCS_6A:
-        case IF_FUNCS_6B:
-        case IF_FUNCS_6C:
-        case IF_FUNCS_6D:
-        case IF_FUNCS_11:
-            // case IF_LA:
-            break;
-
-        default:
-            printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
-            assert(!"Unexpected format");
-            break;
-    }
-}
-#endif // DEBUG
-
 inline bool emitter::emitInsMayWriteToGCReg(instruction ins)
 {
     assert(ins != INS_invalid);
-    ////NOTE: please reference the file "instrsloongarch64.h" for details !!!
+    // NOTE: please reference the file "instrsloongarch64.h" for details !!!
     return (INS_mov <= ins) && (ins <= INS_jirl) ? true : false;
 }
 
@@ -586,38 +487,13 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
         case INS_stx_w:
         case INS_stx_b:
         case INS_stx_h:
-            // case INS_sc_d:
-            // case INS_sc_w:
-            //// not used these instrs right now !!!
-            //#ifdef DEBUG
-            // case INS_stgt_b:
-            // case INS_stgt_h:
-            // case INS_stgt_w:
-            // case INS_stgt_d:
-            // case INS_stle_b:
-            // case INS_stle_h:
-            // case INS_stle_w:
-            // case INS_stle_d:
-            //#endif
             return true;
+
         default:
             return false;
     }
 }
 
-/*****************************************************************************/
-#ifdef DEBUG
-
-// clang-format off
-static const char * const  RegNames[] =
-{
-    #define REGDEF(name, rnum, mask, sname) sname,
-    #include "register.h"
-};
-// clang-format on
-
-#endif // DEBUG
-
 #define LD 1
 #define ST 2
 
@@ -704,9 +580,8 @@ void emitter::emitIns(instruction ins)
 
     id->idIns(ins);
     id->idAddr()->iiaSetInstrEncode(emitInsCode(ins));
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -809,14 +684,15 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
     id->idIns(ins);
 
     code_t code = emitInsCode(ins);
-    D_INST_2RI12(code, (reg1 & 0x1f), reg2, imm);
+    code |= (code_t)(reg1 & 0x1f);
+    code |= (code_t)reg2 << 5;
+    code |= (code_t)(imm & 0xfff) << 10;
 
     id->idAddr()->iiaSetInstrEncode(code);
     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
     id->idSetIsLclVar();
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -878,7 +754,9 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
             ins = INS_addi_d;
         }
         code = emitInsCode(ins);
-        D_INST_2RI12(code, reg1, reg2, imm);
+        code |= (code_t)(reg1 & 0x1f);
+        code |= (code_t)reg2 << 5;
+        code |= (imm & 0xfff) << 10;
     }
     else
     {
@@ -891,7 +769,9 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
 
             ins  = INS_add_d;
             code = emitInsCode(ins);
-            D_INST_add_d(code, reg1, reg2, REG_RA);
+            code |= (code_t)reg1;
+            code |= (code_t)reg2 << 5;
+            code |= (code_t)REG_RA << 10;
         }
         else
         {
@@ -903,8 +783,11 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
             emitIns_R_R_R(INS_add_d, attr, REG_RA, REG_RA, reg2);
 
             imm2 = imm2 & 0x7ff;
+            imm3 = imm3 ? imm2 - imm3 : imm2;
             code = emitInsCode(ins);
-            D_INST_2RI12(code, reg1 /* & 0x1f*/, REG_RA, imm3 ? imm2 - imm3 : imm2);
+            code |= (code_t)reg1;
+            code |= (code_t)REG_RA;
+            code |= (code_t)(imm3 & 0xfff) << 10;
         }
     }
 
@@ -917,9 +800,8 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
     id->idAddr()->iiaSetInstrEncode(code);
     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
     id->idSetIsLclVar();
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -953,9 +835,8 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
 
     id->idIns(ins);
     id->idAddr()->iiaSetInstrEncode(code);
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -967,9 +848,6 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of
         case INS_bceqz:
         case INS_bcnez:
             break;
-        // case INS_:
-        // case INS_:
-        //    break;
 
         default:
             unreached();
@@ -988,9 +866,8 @@ void emitter::emitIns_I_I(instruction ins, emitAttr attr, ssize_t cc, ssize_t of
 
     id->idIns(ins);
     id->idAddr()->iiaSetInstrEncode(code);
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -1032,28 +909,28 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
             assert((0 <= imm) && (imm <= 7));
 
             code |= (reg & 0x1f) << 5; // fj
-            code |= imm /*& 0x7*/;     // cc
+            code |= imm;               // cc
             break;
         case INS_movcf2fr:
             assert(isFloatReg(reg));
             assert((0 <= imm) && (imm <= 7));
 
-            code |= (reg & 0x1f);         // fd
-            code |= (imm /*& 0x7*/) << 5; // cc
+            code |= (reg & 0x1f); // fd
+            code |= imm << 5;     // cc
             break;
         case INS_movgr2cf:
             assert(isGeneralRegister(reg));
             assert((0 <= imm) && (imm <= 7));
 
-            code |= reg << 5;      // rj
-            code |= imm /*& 0x7*/; // cc
+            code |= reg << 5; // rj
+            code |= imm;      // cc
             break;
         case INS_movcf2gr:
             assert(isGeneralRegister(reg));
             assert((0 <= imm) && (imm <= 7));
 
-            code |= reg;                  // rd
-            code |= (imm /*& 0x7*/) << 5; // cc
+            code |= reg;      // rd
+            code |= imm << 5; // cc
             break;
         default:
             unreached();
@@ -1065,9 +942,8 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
     id->idIns(ins);
     id->idReg1(reg);
     id->idAddr()->iiaSetInstrEncode(code);
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -1084,7 +960,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
 //
 void emitter::emitIns_Mov(
     instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */)
-{ // TODO-LoongArch64: should amend for LoongArch64/LOONGARCH64.
+{
     assert(IsMovInstruction(ins));
 
     if (!canSkip || (dstReg != srcReg))
@@ -1115,30 +991,38 @@ void emitter::emitIns_R_R(
     }
     else if ((INS_ext_w_b <= ins) && (ins <= INS_cpucfg))
     {
-        // case INS_ext_w_b:
-        // case INS_ext_w_h:
-        // case INS_clo_w:
-        // case INS_clz_w:
-        // case INS_cto_w:
-        // case INS_ctz_w:
-        // case INS_clo_d:
-        // case INS_clz_d:
-        // case INS_cto_d:
-        // case INS_ctz_d:
-        // case INS_revb_2h:
-        // case INS_revb_4h:
-        // case INS_revb_2w:
-        // case INS_revb_d:
-        // case INS_revh_2w:
-        // case INS_revh_d:
-        // case INS_bitrev_4b:
-        // case INS_bitrev_8b:
-        // case INS_bitrev_w:
-        // case INS_bitrev_d:
-        // case INS_rdtimel_w:
-        // case INS_rdtimeh_w:
-        // case INS_rdtime_d:
-        // case INS_cpucfg:
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_ext_w_b:
+            case INS_ext_w_h:
+            case INS_clo_w:
+            case INS_clz_w:
+            case INS_cto_w:
+            case INS_ctz_w:
+            case INS_clo_d:
+            case INS_clz_d:
+            case INS_cto_d:
+            case INS_ctz_d:
+            case INS_revb_2h:
+            case INS_revb_4h:
+            case INS_revb_2w:
+            case INS_revb_d:
+            case INS_revh_2w:
+            case INS_revh_d:
+            case INS_bitrev_4b:
+            case INS_bitrev_8b:
+            case INS_bitrev_w:
+            case INS_bitrev_d:
+            case INS_rdtimel_w:
+            case INS_rdtimeh_w:
+            case INS_rdtime_d:
+            case INS_cpucfg:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --1!");
+        }
+#endif
         assert(isGeneralRegisterOrR0(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         code |= reg1;      // rd
@@ -1146,8 +1030,6 @@ void emitter::emitIns_R_R(
     }
     else if ((INS_asrtle_d == ins) || (INS_asrtgt_d == ins))
     {
-        // case INS_asrtle_d:
-        // case INS_asrtgt_d:
         assert(isGeneralRegisterOrR0(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         code |= reg1 << 5;  // rj
@@ -1155,50 +1037,58 @@ void emitter::emitIns_R_R(
     }
     else if ((INS_fabs_s <= ins) && (ins <= INS_fmov_d))
     {
-        // case INS_fabs_s:
-        // case INS_fabs_d:
-        // case INS_fneg_s:
-        // case INS_fneg_d:
-        // case INS_fsqrt_s:
-        // case INS_fsqrt_d:
-        // case INS_frsqrt_s:
-        // case INS_frsqrt_d:
-        // case INS_frecip_s:
-        // case INS_frecip_d:
-        // case INS_flogb_s:
-        // case INS_flogb_d:
-        // case INS_fclass_s:
-        // case INS_fclass_d:
-        // case INS_fcvt_s_d:
-        // case INS_fcvt_d_s:
-        // case INS_ffint_s_w:
-        // case INS_ffint_s_l:
-        // case INS_ffint_d_w:
-        // case INS_ffint_d_l:
-        // case INS_ftint_w_s:
-        // case INS_ftint_w_d:
-        // case INS_ftint_l_s:
-        // case INS_ftint_l_d:
-        // case INS_ftintrm_w_s:
-        // case INS_ftintrm_w_d:
-        // case INS_ftintrm_l_s:
-        // case INS_ftintrm_l_d:
-        // case INS_ftintrp_w_s:
-        // case INS_ftintrp_w_d:
-        // case INS_ftintrp_l_s:
-        // case INS_ftintrp_l_d:
-        // case INS_ftintrz_w_s:
-        // case INS_ftintrz_w_d:
-        // case INS_ftintrz_l_s:
-        // case INS_ftintrz_l_d:
-        // case INS_ftintrne_w_s:
-        // case INS_ftintrne_w_d:
-        // case INS_ftintrne_l_s:
-        // case INS_ftintrne_l_d:
-        // case INS_frint_s:
-        // case INS_frint_d:
-        // case INS_fmov_s:
-        // case INS_fmov_d:
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fabs_s:
+            case INS_fabs_d:
+            case INS_fneg_s:
+            case INS_fneg_d:
+            case INS_fsqrt_s:
+            case INS_fsqrt_d:
+            case INS_frsqrt_s:
+            case INS_frsqrt_d:
+            case INS_frecip_s:
+            case INS_frecip_d:
+            case INS_flogb_s:
+            case INS_flogb_d:
+            case INS_fclass_s:
+            case INS_fclass_d:
+            case INS_fcvt_s_d:
+            case INS_fcvt_d_s:
+            case INS_ffint_s_w:
+            case INS_ffint_s_l:
+            case INS_ffint_d_w:
+            case INS_ffint_d_l:
+            case INS_ftint_w_s:
+            case INS_ftint_w_d:
+            case INS_ftint_l_s:
+            case INS_ftint_l_d:
+            case INS_ftintrm_w_s:
+            case INS_ftintrm_w_d:
+            case INS_ftintrm_l_s:
+            case INS_ftintrm_l_d:
+            case INS_ftintrp_w_s:
+            case INS_ftintrp_w_d:
+            case INS_ftintrp_l_s:
+            case INS_ftintrp_l_d:
+            case INS_ftintrz_w_s:
+            case INS_ftintrz_w_d:
+            case INS_ftintrz_l_s:
+            case INS_ftintrz_l_d:
+            case INS_ftintrne_w_s:
+            case INS_ftintrne_w_d:
+            case INS_ftintrne_l_s:
+            case INS_ftintrne_l_d:
+            case INS_frint_s:
+            case INS_frint_d:
+            case INS_fmov_s:
+            case INS_fmov_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --2!");
+        }
+#endif
         assert(isFloatReg(reg1));
         assert(isFloatReg(reg2));
         code |= (reg1 & 0x1f);      // fd
@@ -1206,9 +1096,17 @@ void emitter::emitIns_R_R(
     }
     else if ((INS_movgr2fr_w <= ins) && (ins <= INS_movgr2frh_w))
     {
-        // case INS_movgr2fr_w:
-        // case INS_movgr2fr_d:
-        // case INS_movgr2frh_w:
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_movgr2fr_w:
+            case INS_movgr2fr_d:
+            case INS_movgr2frh_w:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --3!");
+        }
+#endif
         assert(isFloatReg(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         code |= (reg1 & 0x1f); // fd
@@ -1216,9 +1114,17 @@ void emitter::emitIns_R_R(
     }
     else if ((INS_movfr2gr_s <= ins) && (ins <= INS_movfrh2gr_s))
     {
-        // case INS_movfr2gr_s:
-        // case INS_movfr2gr_d:
-        // case INS_movfrh2gr_s:
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_movfr2gr_s:
+            case INS_movfr2gr_d:
+            case INS_movfrh2gr_s:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --4!");
+        }
+#endif
         assert(isGeneralRegisterOrR0(reg1));
         assert(isFloatReg(reg2));
         code |= reg1;               // rd
@@ -1252,9 +1158,8 @@ void emitter::emitIns_R_R(
     id->idReg1(reg1);
     id->idReg2(reg2);
     id->idAddr()->iiaSetInstrEncode(code);
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -1270,10 +1175,19 @@ void emitter::emitIns_R_R_I(
 
     if ((INS_slli_w <= ins) && (ins <= INS_rotri_w))
     {
-        // INS_slli_w
-        // INS_srli_w
-        // INS_srai_w
-        // INS_rotri_w
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_slli_w:
+            case INS_srli_w:
+            case INS_srai_w:
+            case INS_rotri_w:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --1!");
+        }
+#endif
+
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert((0 <= imm) && (imm <= 0x1f));
@@ -1284,10 +1198,18 @@ void emitter::emitIns_R_R_I(
     }
     else if ((INS_slli_d <= ins) && (ins <= INS_rotri_d))
     {
-        // INS_slli_d
-        // INS_srli_d
-        // INS_srai_d
-        // INS_rotri_d
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_slli_d:
+            case INS_srli_d:
+            case INS_srai_d:
+            case INS_rotri_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --2!");
+        }
+#endif
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert((0 <= imm) && (imm <= 0x3f));
@@ -1305,34 +1227,45 @@ void emitter::emitIns_R_R_I(
         if (((INS_addi_w <= ins) && (ins <= INS_slti)) || ((INS_ld_b <= ins) && (ins <= INS_ld_wu)) ||
             ((INS_st_b <= ins) && (ins <= INS_st_d)))
         {
-            // case INS_addi_w:
-            // case INS_addi_d:
-            // case INS_lu52i_d:
-            // case INS_slti:
-            // case INS_ld_b:
-            // case INS_ld_h:
-            // case INS_ld_w:
-            // case INS_ld_d:
-            // case INS_ld_bu:
-            // case INS_ld_hu:
-            // case INS_ld_wu:
-            // case INS_st_b:
-            // case INS_st_h:
-            // case INS_st_w:
-            // case INS_st_d:
+            switch (ins)
+            {
+                case INS_addi_w:
+                case INS_addi_d:
+                case INS_lu52i_d:
+                case INS_slti:
+                case INS_ld_b:
+                case INS_ld_h:
+                case INS_ld_w:
+                case INS_ld_d:
+                case INS_ld_bu:
+                case INS_ld_hu:
+                case INS_ld_wu:
+                case INS_st_b:
+                case INS_st_h:
+                case INS_st_w:
+                case INS_st_d:
+                    break;
+                default:
+                    NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --3!");
+            }
 
             assert((-2048 <= imm) && (imm <= 2047));
         }
         else if (ins == INS_sltui)
         {
-            // case INS_sltui:
             assert((0 <= imm) && (imm <= 0x7ff));
         }
         else
         {
-            // case INS_andi:
-            // case INS_ori:
-            // case INS_xori:
+            switch (ins)
+            {
+                case INS_andi:
+                case INS_ori:
+                case INS_xori:
+                    break;
+                default:
+                    NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --4!");
+            }
             assert((0 <= imm) && (imm <= 0xfff));
         }
 #endif
@@ -1342,10 +1275,18 @@ void emitter::emitIns_R_R_I(
     }
     else if ((INS_fld_s <= ins) && (ins <= INS_fst_d))
     {
-        // INS_fld_s
-        // INS_fld_d
-        // INS_fst_s
-        // INS_fst_d
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fld_s:
+            case INS_fld_d:
+            case INS_fst_s:
+            case INS_fst_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --5!");
+        }
+#endif
         assert(isFloatReg(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert((-2048 <= imm) && (imm <= 2047));
@@ -1356,15 +1297,22 @@ void emitter::emitIns_R_R_I(
     }
     else if (((INS_ll_d >= ins) && (ins >= INS_ldptr_w)) || ((INS_sc_d >= ins) && (ins >= INS_stptr_w)))
     {
-        // INS_ldptr_w
-        // INS_ldptr_d
-        // INS_ll_w
-        // INS_ll_d
-
-        // INS_stptr_w
-        // INS_stptr_d
-        // INS_sc_w
-        // INS_sc_d
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_ldptr_w:
+            case INS_ldptr_d:
+            case INS_ll_w:
+            case INS_ll_d:
+            case INS_stptr_w:
+            case INS_stptr_d:
+            case INS_sc_w:
+            case INS_sc_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --6!");
+        }
+#endif
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert((-8192 <= imm) && (imm <= 8191));
@@ -1375,12 +1323,20 @@ void emitter::emitIns_R_R_I(
     }
     else if ((INS_beq <= ins) && (ins <= INS_bgeu))
     {
-        // INS_beq
-        // INS_bne
-        // INS_blt
-        // INS_bltu
-        // INS_bge
-        // INS_bgeu
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_beq:
+            case INS_bne:
+            case INS_blt:
+            case INS_bltu:
+            case INS_bge:
+            case INS_bgeu:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --7!");
+        }
+#endif
         assert(isGeneralRegisterOrR0(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert(!(imm & 0x3));
@@ -1392,50 +1348,58 @@ void emitter::emitIns_R_R_I(
     }
     else if ((INS_fcmp_caf_s <= ins) && (ins <= INS_fcmp_sune_s))
     {
-        // INS_fcmp_caf_s
-        // INS_fcmp_cun_s
-        // INS_fcmp_ceq_s
-        // INS_fcmp_cueq_s
-        // INS_fcmp_clt_s
-        // INS_fcmp_cult_s
-        // INS_fcmp_cle_s
-        // INS_fcmp_cule_s
-        // INS_fcmp_cne_s
-        // INS_fcmp_cor_s
-        // INS_fcmp_cune_s
-        // INS_fcmp_saf_d
-        // INS_fcmp_sun_d
-        // INS_fcmp_seq_d
-        // INS_fcmp_sueq_d
-        // INS_fcmp_slt_d
-        // INS_fcmp_sult_d
-        // INS_fcmp_sle_d
-        // INS_fcmp_sule_d
-        // INS_fcmp_sne_d
-        // INS_fcmp_sor_d
-        // INS_fcmp_sune_d
-        // INS_fcmp_caf_d
-        // INS_fcmp_cun_d
-        // INS_fcmp_ceq_d
-        // INS_fcmp_cueq_d
-        // INS_fcmp_clt_d
-        // INS_fcmp_cult_d
-        // INS_fcmp_cle_d
-        // INS_fcmp_cule_d
-        // INS_fcmp_cne_d
-        // INS_fcmp_cor_d
-        // INS_fcmp_cune_d
-        // INS_fcmp_saf_s
-        // INS_fcmp_sun_s
-        // INS_fcmp_seq_s
-        // INS_fcmp_sueq_s
-        // INS_fcmp_slt_s
-        // INS_fcmp_sult_s
-        // INS_fcmp_sle_s
-        // INS_fcmp_sule_s
-        // INS_fcmp_sne_s
-        // INS_fcmp_sor_s
-        // INS_fcmp_sune_s
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fcmp_caf_s:
+            case INS_fcmp_cun_s:
+            case INS_fcmp_ceq_s:
+            case INS_fcmp_cueq_s:
+            case INS_fcmp_clt_s:
+            case INS_fcmp_cult_s:
+            case INS_fcmp_cle_s:
+            case INS_fcmp_cule_s:
+            case INS_fcmp_cne_s:
+            case INS_fcmp_cor_s:
+            case INS_fcmp_cune_s:
+            case INS_fcmp_saf_d:
+            case INS_fcmp_sun_d:
+            case INS_fcmp_seq_d:
+            case INS_fcmp_sueq_d:
+            case INS_fcmp_slt_d:
+            case INS_fcmp_sult_d:
+            case INS_fcmp_sle_d:
+            case INS_fcmp_sule_d:
+            case INS_fcmp_sne_d:
+            case INS_fcmp_sor_d:
+            case INS_fcmp_sune_d:
+            case INS_fcmp_caf_d:
+            case INS_fcmp_cun_d:
+            case INS_fcmp_ceq_d:
+            case INS_fcmp_cueq_d:
+            case INS_fcmp_clt_d:
+            case INS_fcmp_cult_d:
+            case INS_fcmp_cle_d:
+            case INS_fcmp_cule_d:
+            case INS_fcmp_cne_d:
+            case INS_fcmp_cor_d:
+            case INS_fcmp_cune_d:
+            case INS_fcmp_saf_s:
+            case INS_fcmp_sun_s:
+            case INS_fcmp_seq_s:
+            case INS_fcmp_sueq_s:
+            case INS_fcmp_slt_s:
+            case INS_fcmp_sult_s:
+            case INS_fcmp_sle_s:
+            case INS_fcmp_sule_s:
+            case INS_fcmp_sne_s:
+            case INS_fcmp_sor_s:
+            case INS_fcmp_sune_s:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_I --8!");
+        }
+#endif
         assert(isFloatReg(reg1));
         assert(isFloatReg(reg2));
         assert((0 <= imm) && (imm <= 7));
@@ -1475,9 +1439,8 @@ void emitter::emitIns_R_R_I(
     id->idReg1(reg1);
     id->idReg2(reg2);
     id->idAddr()->iiaSetInstrEncode(code);
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -1487,12 +1450,9 @@ void emitter::emitIns_R_R_I(
 *  Also checks for a large immediate that needs a second instruction
 *  and will load it in reg1
 *
-*  - Supports instructions: add, adds, sub, subs, and, ands, eor and orr
-*  - Requires that reg1 is a general register and not SP or ZR
-*  - Requires that reg1 != reg2
 */
 void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm)
-{ // maybe optimize.
+{
     assert(isGeneralRegister(reg1));
     assert(reg1 != reg2);
 
@@ -1503,20 +1463,7 @@ void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, re
     {
         case INS_addi_w:
         case INS_addi_d:
-        // case INS_lui:
-        // case INS_lbu:
-        // case INS_lhu:
-        // case INS_lwu:
-        // case INS_lb:
-        // case INS_lh:
-        // case INS_lw:
         case INS_ld_d:
-            // case INS_sb:
-            // case INS_sh:
-            // case INS_sw:
-            // case INS_sd:
-            ////case INS_lwc1:
-            ////case INS_ldc1:
             immFits = isValidSimm12(imm);
             break;
 
@@ -1560,123 +1507,131 @@ void emitter::emitIns_R_R_R(
     if (((INS_add_w <= ins) && (ins <= INS_crcc_w_d_w)) || ((INS_ldx_b <= ins) && (ins <= INS_ldle_d)) ||
         ((INS_stx_b <= ins) && (ins <= INS_stle_d)))
     {
-        // case INS_add_w:
-        // case INS_add_d:
-        // case INS_sub_w:
-        // case INS_sub_d:
-        // case INS_and:
-        // case INS_or:
-        // case INS_nor:
-        // case INS_xor:
-        // case INS_andn:
-        // case INS_orn:
-
-        // case INS_mul_w:
-        // case INS_mul_d:
-        // case INS_mulh_w:
-        // case INS_mulh_wu:
-        // case INS_mulh_d:
-        // case INS_mulh_du:
-        // case INS_mulw_d_w:
-        // case INS_mulw_d_wu:
-        // case INS_div_w:
-        // case INS_div_wu:
-        // case INS_div_d:
-        // case INS_div_du:
-        // case INS_mod_w:
-        // case INS_mod_wu:
-        // case INS_mod_d:
-        // case INS_mod_du:
-
-        // case INS_sll_w:
-        // case INS_srl_w:
-        // case INS_sra_w:
-        // case INS_rotr_w:
-        // case INS_sll_d:
-        // case INS_srl_d:
-        // case INS_sra_d:
-        // case INS_rotr_d:
-
-        // case INS_maskeqz:
-        // case INS_masknez:
-
-        // case INS_slt:
-        // case INS_sltu:
-
-        // case INS_ldx_b:
-        // case INS_ldx_h:
-        // case INS_ldx_w:
-        // case INS_ldx_d:
-        // case INS_ldx_bu:
-        // case INS_ldx_hu:
-        // case INS_ldx_wu:
-        // case INS_stx_b:
-        // case INS_stx_h:
-        // case INS_stx_w:
-        // case INS_stx_d:
-
-        // case INS_ldgt_b:
-        // case INS_ldgt_h:
-        // case INS_ldgt_w:
-        // case INS_ldgt_d:
-        // case INS_ldle_b:
-        // case INS_ldle_h:
-        // case INS_ldle_w:
-        // case INS_ldle_d:
-        // case INS_stgt_b:
-        // case INS_stgt_h:
-        // case INS_stgt_w:
-        // case INS_stgt_d:
-        // case INS_stle_b:
-        // case INS_stle_h:
-        // case INS_stle_w:
-        // case INS_stle_d:
-
-        // case INS_amswap_w:
-        // case INS_amswap_d:
-        // case INS_amswap_db_w:
-        // case INS_amswap_db_d:
-        // case INS_amadd_w:
-        // case INS_amadd_d:
-        // case INS_amadd_db_w:
-        // case INS_amadd_db_d:
-        // case INS_amand_w:
-        // case INS_amand_d:
-        // case INS_amand_db_w:
-        // case INS_amand_db_d:
-        // case INS_amor_w:
-        // case INS_amor_d:
-        // case INS_amor_db_w:
-        // case INS_amor_db_d:
-        // case INS_amxor_w:
-        // case INS_amxor_d:
-        // case INS_amxor_db_w:
-        // case INS_amxor_db_d:
-        // case INS_ammax_w:
-        // case INS_ammax_d:
-        // case INS_ammax_db_w:
-        // case INS_ammax_db_d:
-        // case INS_ammin_w:
-        // case INS_ammin_d:
-        // case INS_ammin_db_w:
-        // case INS_ammin_db_d:
-        // case INS_ammax_wu:
-        // case INS_ammax_du:
-        // case INS_ammax_db_wu:
-        // case INS_ammax_db_du:
-        // case INS_ammin_wu:
-        // case INS_ammin_du:
-        // case INS_ammin_db_wu:
-        // case INS_ammin_db_du:
-
-        // case INS_crc_w_b_w:
-        // case INS_crc_w_h_w:
-        // case INS_crc_w_w_w:
-        // case INS_crc_w_d_w:
-        // case INS_crcc_w_b_w:
-        // case INS_crcc_w_h_w:
-        // case INS_crcc_w_w_w:
-        // case INS_crcc_w_d_w:
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_add_w:
+            case INS_add_d:
+            case INS_sub_w:
+            case INS_sub_d:
+            case INS_and:
+            case INS_or:
+            case INS_nor:
+            case INS_xor:
+            case INS_andn:
+            case INS_orn:
+
+            case INS_mul_w:
+            case INS_mul_d:
+            case INS_mulh_w:
+            case INS_mulh_wu:
+            case INS_mulh_d:
+            case INS_mulh_du:
+            case INS_mulw_d_w:
+            case INS_mulw_d_wu:
+            case INS_div_w:
+            case INS_div_wu:
+            case INS_div_d:
+            case INS_div_du:
+            case INS_mod_w:
+            case INS_mod_wu:
+            case INS_mod_d:
+            case INS_mod_du:
+
+            case INS_sll_w:
+            case INS_srl_w:
+            case INS_sra_w:
+            case INS_rotr_w:
+            case INS_sll_d:
+            case INS_srl_d:
+            case INS_sra_d:
+            case INS_rotr_d:
+
+            case INS_maskeqz:
+            case INS_masknez:
+
+            case INS_slt:
+            case INS_sltu:
+
+            case INS_ldx_b:
+            case INS_ldx_h:
+            case INS_ldx_w:
+            case INS_ldx_d:
+            case INS_ldx_bu:
+            case INS_ldx_hu:
+            case INS_ldx_wu:
+            case INS_stx_b:
+            case INS_stx_h:
+            case INS_stx_w:
+            case INS_stx_d:
+
+            case INS_ldgt_b:
+            case INS_ldgt_h:
+            case INS_ldgt_w:
+            case INS_ldgt_d:
+            case INS_ldle_b:
+            case INS_ldle_h:
+            case INS_ldle_w:
+            case INS_ldle_d:
+            case INS_stgt_b:
+            case INS_stgt_h:
+            case INS_stgt_w:
+            case INS_stgt_d:
+            case INS_stle_b:
+            case INS_stle_h:
+            case INS_stle_w:
+            case INS_stle_d:
+
+            case INS_amswap_w:
+            case INS_amswap_d:
+            case INS_amswap_db_w:
+            case INS_amswap_db_d:
+            case INS_amadd_w:
+            case INS_amadd_d:
+            case INS_amadd_db_w:
+            case INS_amadd_db_d:
+            case INS_amand_w:
+            case INS_amand_d:
+            case INS_amand_db_w:
+            case INS_amand_db_d:
+            case INS_amor_w:
+            case INS_amor_d:
+            case INS_amor_db_w:
+            case INS_amor_db_d:
+            case INS_amxor_w:
+            case INS_amxor_d:
+            case INS_amxor_db_w:
+            case INS_amxor_db_d:
+            case INS_ammax_w:
+            case INS_ammax_d:
+            case INS_ammax_db_w:
+            case INS_ammax_db_d:
+            case INS_ammin_w:
+            case INS_ammin_d:
+            case INS_ammin_db_w:
+            case INS_ammin_db_d:
+            case INS_ammax_wu:
+            case INS_ammax_du:
+            case INS_ammax_db_wu:
+            case INS_ammax_db_du:
+            case INS_ammin_wu:
+            case INS_ammin_du:
+            case INS_ammin_db_wu:
+            case INS_ammin_db_du:
+
+            case INS_crc_w_b_w:
+            case INS_crc_w_h_w:
+            case INS_crc_w_w_w:
+            case INS_crc_w_d_w:
+            case INS_crcc_w_b_w:
+            case INS_crcc_w_h_w:
+            case INS_crcc_w_w_w:
+            case INS_crcc_w_d_w:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --1!");
+        }
+#endif
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert(isGeneralRegisterOrR0(reg3));
@@ -1687,26 +1642,34 @@ void emitter::emitIns_R_R_R(
     }
     else if ((INS_fadd_s <= ins) && (ins <= INS_fcopysign_d))
     {
-        // case INS_fadd_s:
-        // case INS_fadd_d:
-        // case INS_fsub_s:
-        // case INS_fsub_d:
-        // case INS_fmul_s:
-        // case INS_fmul_d:
-        // case INS_fdiv_s:
-        // case INS_fdiv_d:
-        // case INS_fmax_s:
-        // case INS_fmax_d:
-        // case INS_fmin_s:
-        // case INS_fmin_d:
-        // case INS_fmaxa_s:
-        // case INS_fmaxa_d:
-        // case INS_fmina_s:
-        // case INS_fmina_d:
-        // case INS_fscaleb_s:
-        // case INS_fscaleb_d:
-        // case INS_fcopysign_s:
-        // case INS_fcopysign_d:
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fadd_s:
+            case INS_fadd_d:
+            case INS_fsub_s:
+            case INS_fsub_d:
+            case INS_fmul_s:
+            case INS_fmul_d:
+            case INS_fdiv_s:
+            case INS_fdiv_d:
+            case INS_fmax_s:
+            case INS_fmax_d:
+            case INS_fmin_s:
+            case INS_fmin_d:
+            case INS_fmaxa_s:
+            case INS_fmaxa_d:
+            case INS_fmina_s:
+            case INS_fmina_d:
+            case INS_fscaleb_s:
+            case INS_fscaleb_d:
+            case INS_fcopysign_s:
+            case INS_fcopysign_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --2!");
+        }
+#endif
         assert(isFloatReg(reg1));
         assert(isFloatReg(reg2));
         assert(isFloatReg(reg3));
@@ -1717,19 +1680,27 @@ void emitter::emitIns_R_R_R(
     }
     else if ((INS_fldx_s <= ins) && (ins <= INS_fstle_d))
     {
-        // case INS_fldx_s:
-        // case INS_fldx_d:
-        // case INS_fstx_s:
-        // case INS_fstx_d:
-
-        // case INS_fldgt_s:
-        // case INS_fldgt_d:
-        // case INS_fldle_s:
-        // case INS_fldle_d:
-        // case INS_fstgt_s:
-        // case INS_fstgt_d:
-        // case INS_fstle_s:
-        // case INS_fstle_d:
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_fldx_s:
+            case INS_fldx_d:
+            case INS_fstx_s:
+            case INS_fstx_d:
+
+            case INS_fldgt_s:
+            case INS_fldgt_d:
+            case INS_fldle_s:
+            case INS_fldle_d:
+            case INS_fstgt_s:
+            case INS_fstgt_d:
+            case INS_fstle_s:
+            case INS_fstle_d:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R_R --3!");
+        }
+#endif
         assert(isFloatReg(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert(isGeneralRegisterOrR0(reg3));
@@ -1740,7 +1711,7 @@ void emitter::emitIns_R_R_R(
     }
     else
     {
-        assert(!"Unsupported instruction in emitIns_R_R_R");
+        NYI_LOONGARCH64("Unsupported instruction in emitIns_R_R_R");
     }
 
     instrDesc* id = emitNewInstr(attr);
@@ -1750,9 +1721,8 @@ void emitter::emitIns_R_R_R(
     id->idReg2(reg2);
     id->idReg3(reg3);
     id->idAddr()->iiaSetInstrEncode(code);
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -1774,19 +1744,27 @@ void emitter::emitIns_R_R_R_I(instruction ins,
 
     if ((INS_alsl_w <= ins) && (ins <= INS_bytepick_w))
     {
-        // INS_alsl_w
-        // INS_alsl_wu
-        // INS_alsl_d
-        // INS_bytepick_w
+#ifdef DEBUG
+        switch (ins)
+        {
+            case INS_alsl_w:
+            case INS_alsl_wu:
+            case INS_alsl_d:
+            case INS_bytepick_w:
+                break;
+            default:
+                NYI_LOONGARCH64("illegal ins within emitIns_R_R --4!");
+        }
+#endif
         assert(isGeneralRegister(reg1));
         assert(isGeneralRegisterOrR0(reg2));
         assert(isGeneralRegisterOrR0(reg3));
         assert((0 <= imm) && (imm <= 3));
 
-        code |= reg1;                  // rd
-        code |= reg2 << 5;             // rj
-        code |= reg3 << 10;            // rk
-        code |= (imm /*& 0x3*/) << 15; // sa2
+        code |= reg1;       // rd
+        code |= reg2 << 5;  // rj
+        code |= reg3 << 10; // rk
+        code |= imm << 15;  // sa2
     }
     else if (INS_bytepick_d == ins)
     {
@@ -1795,10 +1773,10 @@ void emitter::emitIns_R_R_R_I(instruction ins,
         assert(isGeneralRegisterOrR0(reg3));
         assert((0 <= imm) && (imm <= 7));
 
-        code |= reg1;                  // rd
-        code |= reg2 << 5;             // rj
-        code |= reg3 << 10;            // rk
-        code |= (imm /*& 0x7*/) << 15; // sa3
+        code |= reg1;       // rd
+        code |= reg2 << 5;  // rj
+        code |= reg3 << 10; // rk
+        code |= imm << 15;  // sa3
     }
     else if (INS_fsel == ins)
     {
@@ -1807,10 +1785,10 @@ void emitter::emitIns_R_R_R_I(instruction ins,
         assert(isFloatReg(reg3));
         assert((0 <= imm) && (imm <= 7));
 
-        code |= (reg1 & 0x1f);         // fd
-        code |= (reg2 & 0x1f) << 5;    // fj
-        code |= (reg3 & 0x1f) << 10;   // fk
-        code |= (imm /*& 0x7*/) << 15; // ca
+        code |= (reg1 & 0x1f);       // fd
+        code |= (reg2 & 0x1f) << 5;  // fj
+        code |= (reg3 & 0x1f) << 10; // fk
+        code |= imm << 15;           // ca
     }
     else
     {
@@ -1824,9 +1802,8 @@ void emitter::emitIns_R_R_R_I(instruction ins,
     id->idReg2(reg2);
     id->idReg3(reg3);
     id->idAddr()->iiaSetInstrEncode(code);
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -1870,9 +1847,8 @@ void emitter::emitIns_R_R_I_I(
     id->idReg1(reg1);
     id->idReg2(reg2);
     id->idAddr()->iiaSetInstrEncode(code);
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -1886,7 +1862,6 @@ void emitter::emitIns_R_R_R_R(
 {
     code_t code = emitInsCode(ins);
 
-    //#ifdef DEBUG
     switch (ins)
     {
         case INS_fmadd_s:
@@ -1910,16 +1885,14 @@ void emitter::emitIns_R_R_R_R(
         default:
             unreached();
     }
-    //#endif
 
     instrDesc* id = emitNewInstr(attr);
 
     id->idIns(ins);
     id->idReg1(reg1);
     id->idAddr()->iiaSetInstrEncode(code);
-
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -1945,7 +1918,7 @@ void emitter::emitIns_R_C(
     //   pcaddu12i reg, off-hi-20bits
     //   load  reg, offs_lo-12bits(reg)    #when ins is load ins.
     //
-    // INS_OPTS_RC: ins == bl placeholders.  3-ins:       ////TODO-LoongArch64: maybe optimize.
+    // INS_OPTS_RC: ins == bl placeholders.  3-ins:  // TODO-LoongArch64: maybe optimize.
     //   lu12i_w reg, addr-hi-20bits
     //   ori     reg, reg, addr-lo-12bits
     //   lu32i_d reg, addr_hi-32bits
@@ -1992,7 +1965,6 @@ void emitter::emitIns_R_C(
 
     id->idAddr()->iiaFieldHnd = fldHnd;
 
-    // dispIns(id);//loongarch dumping instr by other-fun.
     appendToCurIG(id);
 }
 
@@ -2041,9 +2013,8 @@ void emitter::emitIns_R_AI(instruction ins,
     }
 
     id->idAddr()->iiaAddr = (BYTE*)addr;
-
     id->idCodeSize(8);
-    // dispIns(id);//loongarch dumping instr by other-fun.
+
     appendToCurIG(id);
 }
 
@@ -2113,7 +2084,6 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu
     }
 #endif // DEBUG
 
-    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -2136,37 +2106,12 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
         assert(instrCount != 0);
         assert(ins == INS_b); // when dst==nullptr, ins is INS_b by now.
 
-#if 1
         assert((-33554432 <= instrCount) && (instrCount < 33554432)); // 0x2000000.
         emitIns_I(ins, EA_PTRSIZE, instrCount << 2); // NOTE: instrCount is the number of the instructions.
-#else
-        instrCount = instrCount << 2;
-        if ((-33554432 <= instrCount) && (instrCount < 33554432))
-        {
-            /* This jump is really short */
-            emitIns_I(ins, EA_PTRSIZE, instrCount);
-        }
-        else
-        {
-            // NOTE: should not be here !!!
-            assert(!"should not be here on LOONGARCH64 !!!");
-
-            // emitIns_I(INS_bl, EA_PTRSIZE, 4);
 
-            // ssize_t imm = ((ssize_t)instrCount>>12);
-            // assert(isValidSimm12(imm));
-            // emitIns_R_I(INS_lu12i_w, EA_PTRSIZE, REG_R21, imm);
-            // imm = (instrCount & 0xfffff);
-            // emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_R21, REG_R21, imm);
-
-            // emitIns_R_R_R(INS_add_d, EA_8BYTE, REG_R21, REG_R21, REG_RA);
-            // emitIns_R_R_I(INS_jirl, EA_PTRSIZE, REG_R0, REG_R21, 0);
-        }
-#endif
         return;
     }
 
-    // (dst != nullptr)
     //
     // INS_OPTS_J: placeholders.  1-ins: if the dst outof-range will be replaced by INS_OPTS_JIRL.
     //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl  dst
@@ -2210,7 +2155,7 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
 #endif
 
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -2267,7 +2212,7 @@ void emitter::emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1
 #endif
 
     id->idCodeSize(4);
-    // dispIns(id);
+
     appendToCurIG(id);
 }
 
@@ -2337,7 +2282,6 @@ void emitter::emitIns_I_la(emitAttr size, regNumber reg, ssize_t imm)
 
     id->idAddr()->iiaAddr = (BYTE*)imm;
 
-    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -2382,7 +2326,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
     assert(callType < EC_INDIR_R || addr == NULL);
     assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
 
-    // ARM never uses these
+    // LoongArch64 never uses these
     assert(xreg == REG_NA && xmul == 0 && disp == 0);
 
     // Our stack level should be always greater than the bytes of arguments we push. Just
@@ -2533,7 +2477,6 @@ void emitter::emitIns_Call(EmitCallType          callType,
     }
 #endif // LATE_DISASM
 
-    // dispIns(id);
     appendToCurIG(id);
 }
 
@@ -2588,7 +2531,9 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
     if (id->idIsCallRegPtr())
     { // EC_INDIR_R
         code = emitInsCode(id->idIns());
-        D_INST_JIRL(code, id->idReg4(), id->idReg3(), 0);
+        code |= (code_t)id->idReg4();
+        code |= (code_t)id->idReg3() << 5;
+        // the offset default is 0;
     }
     else if (id->idIsReloc())
     {
@@ -2600,11 +2545,13 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
         *(code_t*)dst = 0x1e00000e;
 
         size_t addr = (size_t)(id->idAddr()->iiaAddr); // get addr.
-        // should assert(addr-dst < 38bits);
 
         int reg2 = (int)addr & 1;
         addr     = addr ^ 1;
 
+        assert(isValidSimm38(addr - (ssize_t)dst));
+        assert((addr & 3) == 0);
+
         dst += 4;
 #ifdef DEBUG
         code = emitInsCode(INS_pcaddu18i);
@@ -2619,37 +2566,42 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
     }
     else
     {
-        //      lu12i_w  t2, dst_offset_lo32-hi   // TODO-LoongArch64: maybe optimize.
-        //      ori  t2, t2, dst_offset_lo32-lo
-        //      lu32i_d  t2, dst_offset_hi32-lo
-        //      jirl  t2
+        // lu12i_w  t2, dst_offset_lo32-hi   // TODO-LoongArch64: maybe optimize.
+        // ori  t2, t2, dst_offset_lo32-lo
+        // lu32i_d  t2, dst_offset_hi32-lo
+        // jirl  t2
 
         ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr);
-        // assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff.
-        assert((imm >> 32) == 0xff); // for LA64 addr-is 0xff. but this is not the best !!!
+        assert((imm >> 32) == 0xff);
 
         int reg2 = (int)(imm & 1);
         imm -= reg2;
 
         code = emitInsCode(INS_lu12i_w);
-        D_INST_lu12i_w(code, REG_T2, imm >> 12);
+        code |= (code_t)REG_T2;
+        code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
         *(code_t*)dst = code;
         dst += 4;
 
         code = emitInsCode(INS_ori);
-        D_INST_ori(code, REG_T2, REG_T2, imm);
+        code |= (code_t)REG_T2;
+        code |= (code_t)REG_T2 << 5;
+        code |= (code_t)(imm & 0xfff) << 10;
         *(code_t*)dst = code;
         dst += 4;
 
-        // emitIns_R_I(INS_lu32i_d, size, REG_T2, imm >> 32);
         code = emitInsCode(INS_lu32i_d);
-        // D_INST_lu32i_d(code, REG_T2, imm >> 32);
-        D_INST_lu32i_d(code, REG_T2, 0xff);
+        code |= (code_t)REG_T2;
+        code |= 0xff << 5;
+
         *(code_t*)dst = code;
         dst += 4;
 
         code = emitInsCode(INS_jirl);
-        D_INST_JIRL(code, reg2, REG_T2, 0);
+        code |= (code_t)reg2;
+        code |= (code_t)REG_T2 << 5;
+        // the offset default is 0;
     }
 
     dst += 4;
@@ -2727,6 +2679,442 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
     return callInstrSize;
 }
 
+/*****************************************************************************
+ *  LoongArch64 has an individual implementation for emitJumpDistBind().
+ *
+ *  Bind targets of relative jumps/branch to choose the smallest possible encoding.
+ *  LoongArch64 has a small medium, and large encoding.
+ *
+ *  Even though the small encoding is offset-18bits which lowest 2bits is always 0.
+ *  The small encoding as the default is fit for most cases.
+ */
+
+void emitter::emitJumpDistBind()
+{
+#ifdef DEBUG
+    if (emitComp->verbose)
+    {
+        printf("*************** In emitJumpDistBind()\n");
+    }
+    if (EMIT_INSTLIST_VERBOSE)
+    {
+        printf("\nInstruction list before jump distance binding:\n\n");
+        emitDispIGlist(true);
+    }
+#endif
+
+    instrDescJmp* jmp;
+
+    UNATIVE_OFFSET adjIG;
+    UNATIVE_OFFSET adjSJ;
+    insGroup*      lstIG;
+#ifdef DEBUG
+    insGroup* prologIG = emitPrologIG;
+#endif // DEBUG
+
+    // NOTE:
+    //  bit0 of isLinkingEnd_LA: indicating whether updating the instrDescJmp's size with the type INS_OPTS_J;
+    //  bit1 of isLinkingEnd_LA: indicating not needed updating the size while emitTotalCodeSize <= (0x7fff << 2) or had
+    //  updated;
+    unsigned int isLinkingEnd_LA = emitTotalCodeSize <= (0x7fff << 2) ? 2 : 0;
+
+    UNATIVE_OFFSET ssz = 0; // relative small jump's delay-slot.
+    // small  jump max. neg distance
+    NATIVE_OFFSET nsd = B_DIST_SMALL_MAX_NEG;
+    // small  jump max. pos distance
+    NATIVE_OFFSET psd =
+        B_DIST_SMALL_MAX_POS -
+        emitCounts_INS_OPTS_J * (3 << 2); // the max placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+
+/*****************************************************************************/
+/* If the default small encoding is not enough, we start again here.     */
+/*****************************************************************************/
+
+AGAIN:
+
+#ifdef DEBUG
+    emitCheckIGoffsets();
+#endif
+
+#ifdef DEBUG
+    insGroup*     lastIG = nullptr;
+    instrDescJmp* lastSJ = nullptr;
+#endif
+
+    lstIG = nullptr;
+    adjSJ = 0;
+    adjIG = 0;
+
+    for (jmp = emitJumpList; jmp; jmp = jmp->idjNext)
+    {
+        insGroup* jmpIG;
+        insGroup* tgtIG;
+
+        UNATIVE_OFFSET jsz; // size of the jump instruction in bytes
+
+        NATIVE_OFFSET  extra;           // How far beyond the short jump range is this jump offset?
+        UNATIVE_OFFSET srcInstrOffs;    // offset of the source instruction of the jump
+        UNATIVE_OFFSET srcEncodingOffs; // offset of the source used by the instruction set to calculate the relative
+                                        // offset of the jump
+        UNATIVE_OFFSET dstOffs;
+        NATIVE_OFFSET  jmpDist; // the relative jump distance, as it will be encoded
+
+/* Make sure the jumps are properly ordered */
+
+#ifdef DEBUG
+        assert(lastSJ == nullptr || lastIG != jmp->idjIG || lastSJ->idjOffs < (jmp->idjOffs + adjSJ));
+        lastSJ = (lastIG == jmp->idjIG) ? jmp : nullptr;
+
+        assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG ||
+               emitNxtIGnum > unsigned(0xFFFF)); // igNum might overflow
+        lastIG = jmp->idjIG;
+#endif // DEBUG
+
+        /* Get hold of the current jump size */
+
+        jsz = jmp->idCodeSize();
+
+        /* Get the group the jump is in */
+
+        jmpIG = jmp->idjIG;
+
+        /* Are we in a group different from the previous jump? */
+
+        if (lstIG != jmpIG)
+        {
+            /* Were there any jumps before this one? */
+
+            if (lstIG)
+            {
+                /* Adjust the offsets of the intervening blocks */
+
+                do
+                {
+                    lstIG = lstIG->igNext;
+                    assert(lstIG);
+#ifdef DEBUG
+                    if (EMITVERBOSE)
+                    {
+                        printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+                               lstIG->igOffs + adjIG);
+                    }
+#endif // DEBUG
+                    lstIG->igOffs += adjIG;
+                    assert(IsCodeAligned(lstIG->igOffs));
+                } while (lstIG != jmpIG);
+            }
+
+            /* We've got the first jump in a new group */
+            adjSJ = 0;
+            lstIG = jmpIG;
+        }
+
+        /* Apply any local size adjustment to the jump's relative offset */
+        jmp->idjOffs += adjSJ;
+
+        // If this is a jump via register, the instruction size does not change, so we are done.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+        /* Have we bound this jump's target already? */
+
+        if (jmp->idIsBound())
+        {
+            /* Does the jump already have the smallest size? */
+
+            if (jmp->idjShort)
+            {
+                // We should not be jumping/branching across funclets/functions
+                emitCheckFuncletBranch(jmp, jmpIG);
+
+                continue;
+            }
+
+            tgtIG = jmp->idAddr()->iiaIGlabel;
+        }
+        else
+        {
+            /* First time we've seen this label, convert its target */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                printf("Binding: ");
+                emitDispIns(jmp, false, false, false);
+                printf("Binding L_M%03u_" FMT_BB, emitComp->compMethodID, jmp->idAddr()->iiaBBlabel->bbNum);
+            }
+#endif // DEBUG
+
+            tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel);
+
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                if (tgtIG)
+                {
+                    printf(" to %s\n", emitLabelString(tgtIG));
+                }
+                else
+                {
+                    printf("-- ERROR, no emitter cookie for " FMT_BB "; it is probably missing BBF_HAS_LABEL.\n",
+                           jmp->idAddr()->iiaBBlabel->bbNum);
+                }
+            }
+            assert(tgtIG);
+#endif // DEBUG
+
+            /* Record the bound target */
+
+            jmp->idAddr()->iiaIGlabel = tgtIG;
+            jmp->idSetIsBound();
+        }
+
+        // We should not be jumping/branching across funclets/functions
+        emitCheckFuncletBranch(jmp, jmpIG);
+
+        /*
+            In the following distance calculations, if we're not actually
+            scheduling the code (i.e. reordering instructions), we can
+            use the actual offset of the jump (rather than the beg/end of
+            the instruction group) since the jump will not be moved around
+            and thus its offset is accurate.
+
+            First we need to figure out whether this jump is a forward or
+            backward one; to do this we simply look at the ordinals of the
+            group that contains the jump and the target.
+         */
+
+        srcInstrOffs = jmpIG->igOffs + jmp->idjOffs;
+
+        /* Note that the destination is always the beginning of an IG, so no need for an offset inside it */
+        dstOffs = tgtIG->igOffs;
+
+        srcEncodingOffs = srcInstrOffs + ssz; // Encoding offset of relative offset for small branch
+
+        if (jmpIG->igNum < tgtIG->igNum)
+        {
+            /* Forward jump */
+
+            /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between
+               here and the target could be shortened, causing the actual distance to shrink.
+             */
+
+            dstOffs += adjIG;
+
+            /* Compute the distance estimate */
+
+            jmpDist = dstOffs - srcEncodingOffs;
+
+            /* How much beyond the max. short distance does the jump go? */
+
+            extra = jmpDist - psd;
+
+#if DEBUG_EMIT
+            assert(jmp->idDebugOnlyInfo() != nullptr);
+            if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+            {
+                if (INTERESTING_JUMP_NUM == 0)
+                {
+                    printf("[1] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+                }
+                printf("[1] Jump  block is at %08X\n", jmpIG->igOffs);
+                printf("[1] Jump reloffset is %04X\n", jmp->idjOffs);
+                printf("[1] Jump source is at %08X\n", srcEncodingOffs);
+                printf("[1] Label block is at %08X\n", dstOffs);
+                printf("[1] Jump  dist. is    %04X\n", jmpDist);
+                if (extra > 0)
+                {
+                    printf("[1] Dist excess [S] = %d  \n", extra);
+                }
+            }
+            if (EMITVERBOSE)
+            {
+                printf("Estimate of fwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp),
+                       jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist);
+            }
+#endif // DEBUG_EMIT
+
+            assert(jmpDist >= 0); // Forward jump
+            assert(!(jmpDist & 0x3));
+
+            if (isLinkingEnd_LA & 0x2)
+            {
+                jmp->idAddr()->iiaSetJmpOffset(jmpDist);
+            }
+            else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J))
+            {
+                instruction ins = jmp->idIns();
+                assert((INS_bceqz <= ins) && (ins <= INS_bl));
+
+                if (ins <
+                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                {
+                    if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
+                    {
+                        extra = 4;
+                    }
+                    else
+                    {
+                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                        extra = 8;
+                    }
+                }
+                else if (ins < INS_b) //   beqz/bnez < b < bl    // See instrsloongarch64.h.
+                {
+                    if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000)
+                        continue;
+
+                    extra = 4;
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                }
+                else
+                {
+                    assert(ins == INS_b || ins == INS_bl);
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                    continue;
+                }
+
+                jmp->idInsOpt(INS_OPTS_JIRL);
+                jmp->idCodeSize(jmp->idCodeSize() + extra);
+                jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                adjSJ += (UNATIVE_OFFSET)extra;
+                adjIG += (UNATIVE_OFFSET)extra;
+                emitTotalCodeSize += (UNATIVE_OFFSET)extra;
+                jmpIG->igFlags |= IGF_UPD_ISZ;
+                isLinkingEnd_LA |= 0x1;
+            }
+            continue;
+        }
+        else
+        {
+            /* Backward jump */
+
+            /* Compute the distance estimate */
+
+            jmpDist = srcEncodingOffs - dstOffs;
+
+            /* How much beyond the max. short distance does the jump go? */
+
+            extra = jmpDist + nsd;
+
+#if DEBUG_EMIT
+            assert(jmp->idDebugOnlyInfo() != nullptr);
+            if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+            {
+                if (INTERESTING_JUMP_NUM == 0)
+                {
+                    printf("[2] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+                }
+                printf("[2] Jump  block is at %08X\n", jmpIG->igOffs);
+                printf("[2] Jump reloffset is %04X\n", jmp->idjOffs);
+                printf("[2] Jump source is at %08X\n", srcEncodingOffs);
+                printf("[2] Label block is at %08X\n", dstOffs);
+                printf("[2] Jump  dist. is    %04X\n", jmpDist);
+                if (extra > 0)
+                {
+                    printf("[2] Dist excess [S] = %d  \n", extra);
+                }
+            }
+            if (EMITVERBOSE)
+            {
+                printf("Estimate of bwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp),
+                       jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist);
+            }
+#endif // DEBUG_EMIT
+
+            assert(jmpDist >= 0); // Backward jump
+            assert(!(jmpDist & 0x3));
+
+            if (isLinkingEnd_LA & 0x2)
+            {
+                jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative!
+            }
+            else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J))
+            {
+                instruction ins = jmp->idIns();
+                assert((INS_bceqz <= ins) && (ins <= INS_bl));
+
+                if (ins <
+                    INS_beqz) //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu < beqz < bnez  // See instrsloongarch64.h.
+                {
+                    if ((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000)
+                    {
+                        extra = 4;
+                    }
+                    else
+                    {
+                        assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                        extra = 8;
+                    }
+                }
+                else if (ins < INS_b) //   beqz/bnez < b < bl    // See instrsloongarch64.h.
+                {
+                    if (jmpDist + emitCounts_INS_OPTS_J * 4 < 0x200000)
+                        continue;
+
+                    extra = 4;
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                }
+                else
+                {
+                    assert(ins == INS_b || ins == INS_bl);
+                    assert((jmpDist + emitCounts_INS_OPTS_J * 4) < 0x8000000);
+                    continue;
+                }
+
+                jmp->idInsOpt(INS_OPTS_JIRL);
+                jmp->idCodeSize(jmp->idCodeSize() + extra);
+                jmpIG->igSize += (unsigned short)extra; // the placeholder sizeof(INS_OPTS_JIRL) - sizeof(INS_OPTS_J).
+                adjSJ += (UNATIVE_OFFSET)extra;
+                adjIG += (UNATIVE_OFFSET)extra;
+                emitTotalCodeSize += (UNATIVE_OFFSET)extra;
+                jmpIG->igFlags |= IGF_UPD_ISZ;
+                isLinkingEnd_LA |= 0x1;
+            }
+            continue;
+        }
+    } // end for each jump
+
+    if ((isLinkingEnd_LA & 0x3) < 0x2)
+    {
+        // indicating the instrDescJmp's size of the type INS_OPTS_J had updated
+        // after the first round and should iterate again to update.
+        isLinkingEnd_LA = 0x2;
+
+        // Adjust offsets of any remaining blocks.
+        for (; lstIG;)
+        {
+            lstIG = lstIG->igNext;
+            if (!lstIG)
+            {
+                break;
+            }
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                printf("Adjusted offset of " FMT_BB " from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+                       lstIG->igOffs + adjIG);
+            }
+#endif // DEBUG
+
+            lstIG->igOffs += adjIG;
+
+            assert(IsCodeAligned(lstIG->igOffs));
+        }
+        goto AGAIN;
+    }
+
+#ifdef DEBUG
+    if (EMIT_INSTLIST_VERBOSE)
+    {
+        printf("\nLabels list after the jump dist binding:\n\n");
+        emitDispIGlist(false);
+    }
+
+    emitCheckIGoffsets();
+#endif // DEBUG
+}
+
 /*****************************************************************************
  *
  *  Emit a 32-bit LOONGARCH64 instruction
@@ -2831,77 +3219,96 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         {
             ssize_t   imm  = (ssize_t)(id->idAddr()->iiaAddr);
             regNumber reg1 = id->idReg1();
-            dst2 += 4; // assert(dst2 == dst);
+            dst2 += 4;
 
             switch (id->idCodeSize())
             {
-                case 8: // if (id->idCodeSize() == 8)
+                case 8:
                 {
                     if (id->idReg2())
                     { // special for INT64_MAX or UINT32_MAX;
                         code = emitInsCode(INS_addi_d);
-                        // emitIns_R_R_I(INS_addi_d, size, reg, REG_R0, -1);
-                        D_INST_2RI12(code, reg1, REG_R0, -1);
+                        code |= (code_t)reg1;
+                        code |= (code_t)REG_R0;
+                        code |= 0xfff << 10;
+
                         *(code_t*)dst = code;
                         dst += 4;
 
                         ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32;
                         code        = emitInsCode(INS_srli_d);
-                        // emitIns_R_R_I(INS_srli_d, size, reg, reg, ui6);
                         code |= ((code_t)reg1 | ((code_t)reg1 << 5) | (ui6 << 10));
                         *(code_t*)dst = code;
                     }
                     else
                     {
                         code = emitInsCode(INS_lu12i_w);
-                        D_INST_lu12i_w(code, reg1, imm >> 12);
+                        code |= (code_t)reg1;
+                        code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
                         *(code_t*)dst = code;
                         dst += 4;
 
                         code = emitInsCode(INS_ori);
-                        D_INST_ori(code, reg1, reg1, imm);
+                        code |= (code_t)reg1;
+                        code |= (code_t)reg1 << 5;
+                        code |= (code_t)(imm & 0xfff) << 10;
                         *(code_t*)dst = code;
                     }
                     break;
                 }
-                case 12: // else if (id->idCodeSize() == 12)
+                case 12:
                 {
                     code = emitInsCode(INS_lu12i_w);
-                    D_INST_lu12i_w(code, reg1, imm >> 12);
+                    code |= (code_t)reg1;
+                    code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
                     *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(INS_ori);
-                    D_INST_ori(code, reg1, reg1, imm);
+                    code |= (code_t)reg1;
+                    code |= (code_t)reg1 << 5;
+                    code |= (code_t)(imm & 0xfff) << 10;
                     *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(INS_lu32i_d);
-                    // emitIns_R_I(INS_lu32i_d, size, reg, (imm>>32));
-                    D_INST_lu32i_d(code, reg1, imm >> 32);
+                    code |= (code_t)reg1;
+                    code |= ((code_t)(imm >> 32) & 0xfffff) << 5;
+
                     *(code_t*)dst = code;
 
                     break;
                 }
-                case 16: // else if (id->idCodeSize() == 16)
+                case 16:
                 {
                     code = emitInsCode(INS_lu12i_w);
-                    D_INST_lu12i_w(code, reg1, imm >> 12);
+                    code |= (code_t)reg1;
+                    code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
                     *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(INS_ori);
-                    D_INST_ori(code, reg1, reg1, imm);
+                    code |= (code_t)reg1;
+                    code |= (code_t)reg1 << 5;
+                    code |= (code_t)(imm & 0xfff) << 10;
                     *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(INS_lu32i_d);
-                    D_INST_lu32i_d(code, reg1, imm >> 32);
+                    code |= (code_t)reg1;
+                    code |= (code_t)(imm >> 32) << 5;
+
                     *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(INS_lu52i_d);
-                    D_INST_lu52i_d(code, reg1, reg1, imm >> 52);
+                    code |= (code_t)reg1;
+                    code |= (code_t)(reg1) << 5;
+                    code |= ((code_t)(imm >> 52) & 0xfff) << 10;
+
                     *(code_t*)dst = code;
 
                     break;
@@ -2988,7 +3395,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 else
                 {
                     code = emitInsCode(ins);
-                    D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff); // NOTE:here must be REG_R21 !!!
+                    code |= (code_t)(reg1 & 0x1f);
+                    code |= (code_t)REG_R21 << 5; // NOTE:here must be REG_R21 !!!
+                    code |= (code_t)(doff & 0xfff) << 10;
                     *(code_t*)dst = code;
                 }
                 dst += 4;
@@ -3003,23 +3412,27 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 if (ins == INS_bl)
                 {
                     assert((imm >> 32) == 0xff);
-                    // assert((imm >> 32) <= 0x7ffff);
 
                     doff = (int)imm >> 12;
-                    D_INST_lu12i_w(code, REG_R21, doff);
+                    code |= (code_t)REG_R21;
+                    code |= ((code_t)doff & 0xfffff) << 5;
+
                     *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(INS_ori);
-                    D_INST_ori(code, reg1, REG_R21, imm);
+                    code |= (code_t)reg1;
+                    code |= (code_t)REG_R21 << 5;
+                    code |= (code_t)(imm & 0xfff) << 10;
                     *(code_t*)dst = code;
                     dst += 4;
                     dst2 = dst;
 
                     ins  = INS_lu32i_d;
                     code = emitInsCode(INS_lu32i_d);
-                    // D_INST_lu32i_d(code, reg1, imm >> 32);
-                    D_INST_lu32i_d(code, reg1, 0xff);
+                    code |= (code_t)reg1;
+                    code |= 0xff << 5;
+
                     *(code_t*)dst = code;
                     dst += 4;
                 }
@@ -3030,22 +3443,26 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                     doff = (int)(imm & 0x7ff) - doff; // addr-lo-12bit.
 
                     assert((imm >> 32) == 0xff);
-                    // assert((imm >> 32) <= 0x7ffff);
 
                     dataOffs = (unsigned)(imm >> 12); // addr-hi-20bits.
-                    D_INST_lu12i_w(code, REG_R21, dataOffs);
+                    code |= (code_t)REG_R21;
+                    code |= ((code_t)dataOffs & 0xfffff) << 5;
+
                     *(code_t*)dst = code;
                     dst += 4;
 
-                    // emitIns_R_I(INS_lu32i_d, size, REG_R21, imm >> 32);
                     code = emitInsCode(INS_lu32i_d);
-                    // D_INST_lu32i_d(code, REG_R21, imm >> 32);
-                    D_INST_lu32i_d(code, REG_R21, 0xff);
+                    code |= (code_t)REG_R21;
+                    code |= 0xff << 5;
+
                     *(code_t*)dst = code;
                     dst += 4;
 
                     code = emitInsCode(ins);
-                    D_INST_LS(code, (reg1 & 0x1f), REG_R21, doff);
+                    code |= (code_t)(reg1 & 0x1f);
+                    code |= (code_t)REG_R21 << 5;
+                    code |= (code_t)(doff & 0xfff) << 10;
+
                     *(code_t*)dst = code;
                     dst += 4;
                     dst2 = dst;
@@ -3101,25 +3518,28 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             else
             {
                 ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock;
-                // assert((imm >> 32) <= 0x7ffff);//In fact max is <= 0xffff
                 assert((imm >> 32) == 0xff);
 
                 code = emitInsCode(INS_lu12i_w);
-                D_INST_lu12i_w(code, REG_R21, imm >> 12);
+                code |= (code_t)REG_R21;
+                code |= ((code_t)(imm >> 12) & 0xfffff) << 5;
+
                 *(code_t*)dst = code;
                 dst += 4;
 
                 code = emitInsCode(INS_ori);
-                D_INST_ori(code, reg1, REG_R21, imm);
+                code |= (code_t)reg1;
+                code |= (code_t)REG_R21 << 5;
+                code |= (code_t)(imm & 0xfff) << 10;
                 *(code_t*)dst = code;
                 dst += 4;
                 dst2 = dst;
 
-                ins = INS_lu32i_d;
-                // emitIns_R_I(INS_lu32i_d, size, reg1, 0xff);
+                ins  = INS_lu32i_d;
                 code = emitInsCode(INS_lu32i_d);
-                // D_INST_lu32i_d(code, reg1, imm >> 32);
-                D_INST_lu32i_d(code, reg1, 0xff);
+                code |= (code_t)reg1;
+                code |= 0xff << 5;
+
                 *(code_t*)dst = code;
             }
 
@@ -3155,6 +3575,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                     ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset();
                     imm -= 4;
 
+                    assert((imm & 0x3) == 0);
+
                     ins = jmp->idIns();
                     assert(jmp->idCodeSize() > 4); // The original INS_OPTS_JIRL: not used by now!!!
                     switch (jmp->idCodeSize())
@@ -3163,22 +3585,28 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                         {
                             regNumber reg2 = id->idReg2();
                             assert((INS_bceqz <= ins) && (ins <= INS_bgeu));
-                            // assert((INS_bceqz <= ins) && (ins <= INS_bl)); // TODO-LoongArch64
+
                             if ((INS_beq == ins) || (INS_bne == ins))
                             {
                                 if ((-0x400000 <= imm) && (imm < 0x400000))
                                 {
                                     code = emitInsCode(INS_xor);
-                                    D_INST_3R(code, REG_R21, reg1, reg2);
+                                    code |= (code_t)REG_R21;
+                                    code |= (code_t)reg1 << 5;
+                                    code |= (code_t)reg2 << 10;
+
                                     *(code_t*)dst = code;
                                     dst += 4;
 
                                     code = emitInsCode(ins == INS_beq ? INS_beqz : INS_bnez);
-                                    D_INST_Bcond_Z(code, REG_R21, imm);
+                                    code |= (code_t)REG_R21 << 5;
+                                    code |= (((code_t)imm << 8) & 0x3fffc00);
+                                    code |= (((code_t)imm >> 18) & 0x1f);
+
                                     *(code_t*)dst = code;
                                     dst += 4;
                                 }
-                                else // if ((-0x8000000 <= imm) && (imm < 0x8000000))
+                                else
                                 {
                                     assert((-0x8000000 <= imm) && (imm < 0x8000000));
                                     assert((INS_bne & 0xfffe) == INS_beq);
@@ -3191,12 +3619,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                                     dst += 4;
 
                                     code = emitInsCode(INS_b);
-                                    D_INST_B(code, imm);
+                                    code |= ((code_t)imm >> 18) & 0x3ff;
+                                    code |= ((code_t)imm << 8) & 0x3fffc00;
+
                                     *(code_t*)dst = code;
                                     dst += 4;
                                 }
-                                // else
-                                //    unreached();
                             }
                             else if ((INS_bceqz == ins) || (INS_bcnez == ins))
                             {
@@ -3204,13 +3632,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                                 assert((INS_bcnez & 0xfffe) == INS_bceqz);
 
                                 code = emitInsCode((instruction)((int)ins ^ 0x1));
-                                code |= ((code_t)reg1) << 5; /* rj */
+                                code |= ((code_t)reg1) << 5;
                                 code |= 0x800;
                                 *(code_t*)dst = code;
                                 dst += 4;
 
                                 code = emitInsCode(INS_b);
-                                D_INST_B(code, imm);
+                                code |= ((code_t)imm >> 18) & 0x3ff;
+                                code |= ((code_t)imm << 8) & 0x3fffc00;
+
                                 *(code_t*)dst = code;
                                 dst += 4;
                             }
@@ -3228,13 +3658,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                                 dst += 4;
 
                                 code = emitInsCode(INS_b);
-                                D_INST_B(code, imm);
+                                code |= ((code_t)imm >> 18) & 0x3ff;
+                                code |= ((code_t)imm << 8) & 0x3fffc00;
+
                                 *(code_t*)dst = code;
                                 dst += 4;
                             }
                             break;
                         }
-                        // case 12:
+
                         default:
                             unreached();
                             break;
@@ -3256,7 +3688,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
                 ins  = id->idIns();
                 code = emitInsCode(ins);
-                D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm);
+                code |= ((code_t)id->idReg1()) << 5;
+                code |= ((code_t)id->idReg2());
+                code |= (((code_t)imm << 8) & 0x3fffc00);
+
                 *(code_t*)dst = code;
                 dst += 4;
 
@@ -3267,31 +3702,39 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             //   bceqz/bcnez/beq/bne/blt/bltu/bge/bgeu/beqz/bnez/b/bl  dst-relative.
             {
                 ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot.
-                assert(!(imm & 3));
+                assert((imm & 3) == 0);
 
                 ins  = id->idIns();
                 code = emitInsCode(ins);
                 if (ins == INS_b || ins == INS_bl)
                 {
-                    D_INST_B(code, imm);
+                    code |= ((code_t)imm >> 18) & 0x3ff;
+                    code |= ((code_t)imm << 8) & 0x3fffc00;
                 }
                 else if (ins == INS_bnez || ins == INS_beqz)
                 {
-                    D_INST_Bcond_Z(code, id->idReg1(), imm);
+                    code |= (code_t)id->idReg1() << 5;
+                    code |= (((code_t)imm << 8) & 0x3fffc00);
+                    code |= (((code_t)imm >> 18) & 0x1f);
                 }
                 else if (ins == INS_bcnez || ins == INS_bceqz)
                 {
                     assert((code_t)(id->idReg1()) < 8); // cc
-                    D_INST_Bcond_Z(code, id->idReg1(), imm);
+                    code |= (code_t)id->idReg1() << 5;
+                    code |= (((code_t)imm << 8) & 0x3fffc00);
+                    code |= (((code_t)imm >> 18) & 0x1f);
                 }
                 else if ((INS_beq <= ins) && (ins <= INS_bgeu))
                 {
-                    D_INST_Bcond(code, id->idReg1(), id->idReg2(), imm);
+                    code |= ((code_t)id->idReg1()) << 5;
+                    code |= ((code_t)id->idReg2());
+                    code |= (((code_t)imm << 8) & 0x3fffc00);
                 }
                 else
                 {
                     assert(!"unimplemented on LOONGARCH yet");
                 }
+
                 *(code_t*)dst = code;
                 dst += 4;
 
@@ -3341,20 +3784,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         {
             emitGCregDeadUpd(id->idReg1(), dst2);
         }
-
-        // if (emitInsMayWriteMultipleRegs(id))
-        //{
-        //    // INS_gslq etc...
-        //    // "idReg2" is the secondary destination register
-        //    if (id->idGCrefReg2() != GCT_NONE)
-        //    {
-        //        emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), *dp);
-        //    }
-        //    else
-        //    {
-        //        emitGCregDeadUpd(id->idReg2(), *dp);
-        //    }
-        //}
     }
 
     // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
@@ -3455,6 +3884,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
 #ifdef DEBUG
 
+// clang-format off
+static const char* const RegNames[] =
+{
+    #define REGDEF(name, rnum, mask, sname) sname,
+    #include "register.h"
+};
+// clang-format on
+
 /****************************************************************************
  *
  *  Display the given instruction.
@@ -3472,13 +3909,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         return;
     }
 
-    // clang-format off
-    const char * const regName[] = {"zero", "ra", "tp", "sp", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "x0", "fp", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"};
-
-    const char * const FregName[] = {"fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "ft8", "ft9", "ft10", "ft11", "ft12", "ft13", "ft14", "ft15", "fs0", "fs1", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7"};
-
-    const char * const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"};
-    // clang-format on
+    const char* const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"};
 
     unsigned int opcode = (code >> 26) & 0x3f;
 
@@ -3507,8 +3938,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI16_ADDU16I_D: // 0x4
         {
-            const char* rd   = regName[code & 0x1f];
-            const char* rj   = regName[(code >> 5) & 0x1f];
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si16 = (code >> 10) & 0xffff;
             printf("   0x%llx   addu16i.d  %s, %s, %d\n", insstrs, rd, rj, si16);
             return;
@@ -3519,7 +3950,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         {
             // bits: 31-25,MSB7
             unsigned int inscode = (code >> 25) & 0x7f;
-            const char*  rd      = regName[code & 0x1f];
+            const char*  rd      = RegNames[code & 0x1f];
             unsigned int si20    = (code >> 5) & 0xfffff;
             switch (inscode)
             {
@@ -3554,8 +3985,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         {
             // bits: 31-24,MSB8
             unsigned int inscode = (code >> 24) & 0xff;
-            const char*  rd      = regName[code & 0x1f];
-            const char*  rj      = regName[(code >> 5) & 0x1f];
+            const char*  rd      = RegNames[code & 0x1f];
+            const char*  rj      = RegNames[(code >> 5) & 0x1f];
             short        si14    = ((code >> 10) & 0x3fff) << 2;
             si14 >>= 2;
             switch (inscode)
@@ -3594,9 +4025,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         {
             // bits: 31-24,MSB8
             unsigned int inscode = (code >> 22) & 0x3ff;
-            const char*  rd      = regName[code & 0x1f];
-            const char*  rj      = regName[(code >> 5) & 0x1f];
-            const char*  fd      = FregName[code & 0x1f];
+            const char*  rd      = RegNames[code & 0x1f];
+            const char*  rj      = RegNames[(code >> 5) & 0x1f];
+            const char*  fd      = RegNames[(code & 0x1f) + 32];
             short        si12    = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             switch (inscode)
@@ -3657,7 +4088,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_1RI21_BEQZ: // 0x10
         {
-            const char* rj     = regName[(code >> 5) & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
             offs21 >>= 9;
             printf("   0x%llx   beqz  %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21);
@@ -3665,7 +4096,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_1RI21_BNEZ: // 0x11
         {
-            const char* rj     = regName[(code >> 5) & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
             offs21 >>= 9;
             printf("   0x%llx   bnez  %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21);
@@ -3697,8 +4128,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI16_JIRL: // 0x13
         {
-            const char* rd     = regName[code & 0x1f];
-            const char* rj     = regName[(code >> 5) & 0x1f];
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             if (id->idDebugOnlyInfo()->idMemCookie)
@@ -3730,8 +4161,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI16_BEQ: // 0x16
         {
-            const char* rd     = regName[code & 0x1f];
-            const char* rj     = regName[(code >> 5) & 0x1f];
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   beq  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
@@ -3739,8 +4170,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI16_BNE: // 0x17
         {
-            const char* rd     = regName[code & 0x1f];
-            const char* rj     = regName[(code >> 5) & 0x1f];
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   bne  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
@@ -3748,8 +4179,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI16_BLT: // 0x18
         {
-            const char* rd     = regName[code & 0x1f];
-            const char* rj     = regName[(code >> 5) & 0x1f];
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   blt  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
@@ -3757,8 +4188,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI16_BGE: // 0x19
         {
-            const char* rd     = regName[code & 0x1f];
-            const char* rj     = regName[(code >> 5) & 0x1f];
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   bge  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
@@ -3766,8 +4197,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI16_BLTU: // 0x1a
         {
-            const char* rd     = regName[code & 0x1f];
-            const char* rj     = regName[(code >> 5) & 0x1f];
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   bltu  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
@@ -3775,8 +4206,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI16_BGEU: // 0x1b
         {
-            const char* rd     = regName[code & 0x1f];
-            const char* rj     = regName[(code >> 5) & 0x1f];
+            const char* rd     = RegNames[code & 0x1f];
+            const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
             printf("   0x%llx   bgeu  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
@@ -3810,8 +4241,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         {
                             // bits:31-10,MSB22
                             unsigned int inscode3 = (code >> 10) & 0x3fffff;
-                            const char*  rd       = regName[code & 0x1f];
-                            const char*  rj       = regName[(code >> 5) & 0x1f];
+                            const char*  rd       = RegNames[code & 0x1f];
+                            const char*  rj       = RegNames[(code >> 5) & 0x1f];
                             switch (inscode3)
                             {
                                 case LA_2R_CLO_W:
@@ -3895,15 +4326,15 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         }
                         case LA_2R_ASRTLE_D:
                         {
-                            const char* rj = regName[(code >> 5) & 0x1f];
-                            const char* rk = regName[(code >> 10) & 0x1f];
+                            const char* rj = RegNames[(code >> 5) & 0x1f];
+                            const char* rk = RegNames[(code >> 10) & 0x1f];
                             printf("   0x%llx   asrtle.d  %s, %s\n", insstrs, rj, rk);
                             return;
                         }
                         case LA_2R_ASRTGT_D:
                         {
-                            const char* rj = regName[(code >> 5) & 0x1f];
-                            const char* rk = regName[(code >> 10) & 0x1f];
+                            const char* rj = RegNames[(code >> 5) & 0x1f];
+                            const char* rk = RegNames[(code >> 10) & 0x1f];
                             printf("   0x%llx   asrtgt.d  %s, %s\n", insstrs, rj, rk);
                             return;
                         }
@@ -3917,9 +4348,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 {
                     // LA_OP_ALSL_W
                     // LA_OP_ALSL_WU
-                    const char*  rd  = regName[code & 0x1f];
-                    const char*  rj  = regName[(code >> 5) & 0x1f];
-                    const char*  rk  = regName[(code >> 10) & 0x1f];
+                    const char*  rd  = RegNames[code & 0x1f];
+                    const char*  rj  = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk  = RegNames[(code >> 10) & 0x1f];
                     unsigned int sa2 = (code >> 15) & 0x3;
                     if (0 == ((code >> 17) & 0x1))
                     {
@@ -3940,18 +4371,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 }
                 case LA_OP_BYTEPICK_W: // 0x2
                 {
-                    const char*  rd  = regName[code & 0x1f];
-                    const char*  rj  = regName[(code >> 5) & 0x1f];
-                    const char*  rk  = regName[(code >> 10) & 0x1f];
+                    const char*  rd  = RegNames[code & 0x1f];
+                    const char*  rj  = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk  = RegNames[(code >> 10) & 0x1f];
                     unsigned int sa2 = (code >> 15) & 0x3;
                     printf("   0x%llx   bytepick.w  %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa2);
                     return;
                 }
                 case LA_OP_BYTEPICK_D: // 0x3
                 {
-                    const char*  rd  = regName[code & 0x1f];
-                    const char*  rj  = regName[(code >> 5) & 0x1f];
-                    const char*  rk  = regName[(code >> 10) & 0x1f];
+                    const char*  rd  = RegNames[code & 0x1f];
+                    const char*  rj  = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk  = RegNames[(code >> 10) & 0x1f];
                     unsigned int sa3 = (code >> 15) & 0x7;
                     printf("   0x%llx   bytepick.d  %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa3);
                     return;
@@ -3965,9 +4396,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 {
                     // bits: 31-15,MSB17
                     unsigned int inscode2 = (code >> 15) & 0x1ffff;
-                    const char*  rd       = regName[code & 0x1f];
-                    const char*  rj       = regName[(code >> 5) & 0x1f];
-                    const char*  rk       = regName[(code >> 10) & 0x1f];
+                    const char*  rd       = RegNames[code & 0x1f];
+                    const char*  rj       = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk       = RegNames[(code >> 10) & 0x1f];
 
                     switch (inscode2)
                     {
@@ -4137,9 +4568,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 }
                 case LA_OP_ALSL_D: // 0xb
                 {
-                    const char*  rd  = regName[code & 0x1f];
-                    const char*  rj  = regName[(code >> 5) & 0x1f];
-                    const char*  rk  = regName[(code >> 10) & 0x1f];
+                    const char*  rd  = RegNames[code & 0x1f];
+                    const char*  rj  = RegNames[(code >> 5) & 0x1f];
+                    const char*  rk  = RegNames[(code >> 10) & 0x1f];
                     unsigned int sa2 = (code >> 15) & 0x3;
                     printf("   0x%llx   alsl.d  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1));
                     return;
@@ -4156,8 +4587,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             {
                 // LA_OP_BSTRINS_W
                 // LA_OP_BSTRPICK_W
-                const char*  rd   = regName[code & 0x1f];
-                const char*  rj   = regName[(code >> 5) & 0x1f];
+                const char*  rd   = RegNames[code & 0x1f];
+                const char*  rj   = RegNames[(code >> 5) & 0x1f];
                 unsigned int lsbw = (code >> 10) & 0x1f;
                 unsigned int msbw = (code >> 16) & 0x1f;
                 if (!(code & 0x8000))
@@ -4186,8 +4617,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     {
                         // LA_OP_SLLI_W:
                         // LA_OP_SLLI_D:
-                        const char* rd = regName[code & 0x1f];
-                        const char* rj = regName[(code >> 5) & 0x1f];
+                        const char* rd = RegNames[code & 0x1f];
+                        const char* rj = RegNames[(code >> 5) & 0x1f];
                         if (1 == ((code >> 15) & 0x7))
                         {
                             unsigned int ui5 = (code >> 10) & 0x1f;
@@ -4211,8 +4642,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     {
                         // LA_OP_SRLI_W:
                         // LA_OP_SRLI_D:
-                        const char* rd = regName[code & 0x1f];
-                        const char* rj = regName[(code >> 5) & 0x1f];
+                        const char* rd = RegNames[code & 0x1f];
+                        const char* rj = RegNames[(code >> 5) & 0x1f];
                         if (1 == ((code >> 15) & 0x7))
                         {
                             unsigned int ui5 = (code >> 10) & 0x1f;
@@ -4236,8 +4667,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     {
                         // LA_OP_SRAI_W:
                         // LA_OP_SRAI_D:
-                        const char* rd = regName[code & 0x1f];
-                        const char* rj = regName[(code >> 5) & 0x1f];
+                        const char* rd = RegNames[code & 0x1f];
+                        const char* rj = RegNames[(code >> 5) & 0x1f];
                         if (1 == ((code >> 15) & 0x7))
                         {
                             unsigned int ui5 = (code >> 10) & 0x1f;
@@ -4261,8 +4692,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     {
                         // LA_OP_ROTRI_W:
                         // LA_OP_ROTRI_D:
-                        const char* rd = regName[code & 0x1f];
-                        const char* rj = regName[(code >> 5) & 0x1f];
+                        const char* rd = RegNames[code & 0x1f];
+                        const char* rj = RegNames[(code >> 5) & 0x1f];
                         if (1 == ((code >> 15) & 0x7))
                         {
                             unsigned int ui5 = (code >> 10) & 0x1f;
@@ -4292,8 +4723,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_OP_BSTRINS_D:
         {
-            const char*  rd   = regName[code & 0x1f];
-            const char*  rj   = regName[(code >> 5) & 0x1f];
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int lsbd = (code >> 10) & 0x3f;
             unsigned int msbd = (code >> 16) & 0x3f;
             printf("   0x%llx   bstrins.d  %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd);
@@ -4301,8 +4732,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_OP_BSTRPICK_D:
         {
-            const char*  rd   = regName[code & 0x1f];
-            const char*  rj   = regName[(code >> 5) & 0x1f];
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int lsbd = (code >> 10) & 0x3f;
             unsigned int msbd = (code >> 16) & 0x3f;
             printf("   0x%llx   bstrpick.d  %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd);
@@ -4312,11 +4743,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         {
             // bits: 31-15,MSB17
             unsigned int inscode1 = (code >> 15) & 0x1ffff;
-            const char*  fd       = FregName[code & 0x1f];
-            const char*  fj       = FregName[(code >> 5) & 0x1f];
-            const char*  fk       = FregName[(code >> 10) & 0x1f];
-            const char*  rd       = regName[code & 0x1f];
-            const char*  rj       = regName[(code >> 5) & 0x1f];
+            const char*  fd       = RegNames[(code & 0x1f) + 32];
+            const char*  fj       = RegNames[((code >> 5) & 0x1f) + 32];
+            const char*  fk       = RegNames[((code >> 10) & 0x1f) + 32];
+            const char*  rd       = RegNames[code & 0x1f];
+            const char*  rj       = RegNames[(code >> 5) & 0x1f];
 
             switch (inscode1)
             {
@@ -4588,8 +5019,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI12_SLTI: // 0x8
         {
-            const char* rd   = regName[code & 0x1f];
-            const char* rj   = regName[(code >> 5) & 0x1f];
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             printf("   0x%llx   slti  %s, %s, %d\n", insstrs, rd, rj, si12);
@@ -4597,8 +5028,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI12_SLTUI: // 0x9
         {
-            const char* rd   = regName[code & 0x1f];
-            const char* rj   = regName[(code >> 5) & 0x1f];
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             printf("   0x%llx   sltui  %s, %s, %d\n", insstrs, rd, rj, si12);
@@ -4606,8 +5037,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI12_ADDI_W: // 0xa
         {
-            const char* rd   = regName[code & 0x1f];
-            const char* rj   = regName[(code >> 5) & 0x1f];
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             printf("   0x%llx   addi.w  %s, %s, %d\n", insstrs, rd, rj, si12);
@@ -4615,8 +5046,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI12_ADDI_D: // 0xb
         {
-            const char* rd   = regName[code & 0x1f];
-            const char* rj   = regName[(code >> 5) & 0x1f];
+            const char* rd   = RegNames[code & 0x1f];
+            const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
             printf("   0x%llx   addi.d  %s, %s, %ld\n", insstrs, rd, rj, si12);
@@ -4624,32 +5055,32 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_2RI12_LU52I_D: // 0xc
         {
-            const char*  rd   = regName[code & 0x1f];
-            const char*  rj   = regName[(code >> 5) & 0x1f];
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int si12 = (code >> 10) & 0xfff;
             printf("   0x%llx   lu52i.d  %s, %s, 0x%x\n", insstrs, rd, rj, si12);
             return;
         }
         case LA_2RI12_ANDI: // 0xd
         {
-            const char*  rd   = regName[code & 0x1f];
-            const char*  rj   = regName[(code >> 5) & 0x1f];
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int ui12 = ((code >> 10) & 0xfff);
             printf("   0x%llx   andi  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
             return;
         }
         case LA_2RI12_ORI: // 0xe
         {
-            const char*  rd   = regName[code & 0x1f];
-            const char*  rj   = regName[(code >> 5) & 0x1f];
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int ui12 = ((code >> 10) & 0xfff);
             printf("   0x%llx   ori  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
             return;
         }
         case LA_2RI12_XORI: // 0xf
         {
-            const char*  rd   = regName[code & 0x1f];
-            const char*  rj   = regName[(code >> 5) & 0x1f];
+            const char*  rd   = RegNames[code & 0x1f];
+            const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int ui12 = ((code >> 10) & 0xfff);
             printf("   0x%llx   xori  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
             return;
@@ -4672,73 +5103,73 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
     {
         case LA_4R_FMADD_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* fj = FregName[(code >> 5) & 0x1f];
-            const char* fk = FregName[(code >> 10) & 0x1f];
-            const char* fa = FregName[(code >> 15) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
             printf("   0x%llx   fmadd.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FMADD_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* fj = FregName[(code >> 5) & 0x1f];
-            const char* fk = FregName[(code >> 10) & 0x1f];
-            const char* fa = FregName[(code >> 15) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
             printf("   0x%llx   fmadd.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FMSUB_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* fj = FregName[(code >> 5) & 0x1f];
-            const char* fk = FregName[(code >> 10) & 0x1f];
-            const char* fa = FregName[(code >> 15) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
             printf("   0x%llx   fmsub.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FMSUB_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* fj = FregName[(code >> 5) & 0x1f];
-            const char* fk = FregName[(code >> 10) & 0x1f];
-            const char* fa = FregName[(code >> 15) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
             printf("   0x%llx   fmsub.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMADD_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* fj = FregName[(code >> 5) & 0x1f];
-            const char* fk = FregName[(code >> 10) & 0x1f];
-            const char* fa = FregName[(code >> 15) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
             printf("   0x%llx   fnmadd.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMADD_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* fj = FregName[(code >> 5) & 0x1f];
-            const char* fk = FregName[(code >> 10) & 0x1f];
-            const char* fa = FregName[(code >> 15) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
             printf("   0x%llx   fnmadd.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMSUB_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* fj = FregName[(code >> 5) & 0x1f];
-            const char* fk = FregName[(code >> 10) & 0x1f];
-            const char* fa = FregName[(code >> 15) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
             printf("   0x%llx   fnmsub.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMSUB_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* fj = FregName[(code >> 5) & 0x1f];
-            const char* fk = FregName[(code >> 10) & 0x1f];
-            const char* fa = FregName[(code >> 15) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
+            const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
             printf("   0x%llx   fnmsub.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
             return;
         }
@@ -4758,8 +5189,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             // bits:19-15,cond
             unsigned int cond = (code >> 15) & 0x1f;
             const char*  cd   = CFregName[code & 0x7];
-            const char*  fj   = FregName[(code >> 5) & 0x1f];
-            const char*  fk   = FregName[(code >> 10) & 0x1f];
+            const char*  fj   = RegNames[((code >> 5) & 0x1f) + 32];
+            const char*  fk   = RegNames[((code >> 10) & 0x1f) + 32];
             switch (cond)
             {
                 case 0x0:
@@ -4838,8 +5269,8 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             // bits:19-15,cond
             unsigned int cond = (code >> 15) & 0x1f;
             const char*  cd   = CFregName[code & 0x7];
-            const char*  fj   = FregName[(code >> 5) & 0x1f];
-            const char*  fk   = FregName[(code >> 10) & 0x1f];
+            const char*  fj   = RegNames[((code >> 5) & 0x1f) + 32];
+            const char*  fk   = RegNames[((code >> 10) & 0x1f) + 32];
             switch (cond)
             {
                 case 0x0:
@@ -4915,9 +5346,9 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_4R_FSEL:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* fj = FregName[(code >> 5) & 0x1f];
-            const char* fk = FregName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
+            const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* ca = CFregName[(code >> 15) & 0x7];
             printf("   0x%llx   fsel  %s, %s, %s, %s\n", insstrs, fd, fj, fk, ca);
             return;
@@ -4935,89 +5366,89 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
     {
         case LA_3R_LDX_B:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_H:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STX_B:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stx.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STX_H:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stx.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STX_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stx.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STX_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stx.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_BU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.bu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_HU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.hu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDX_WU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldx.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
@@ -5026,321 +5457,321 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             return;
         case LA_3R_FLDX_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fldx.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FLDX_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fldx.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTX_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fstx.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTX_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fstx.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amswap.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amswap.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amadd.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amadd.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amand.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amand.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amor.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amor.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amxor.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amxor.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_WU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax.du  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_WU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin.du  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_DB_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amswap_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_DB_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amswap_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_DB_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amadd_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_DB_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amadd_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_DB_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amand_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_DB_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amand_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_DB_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amor_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_DB_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amor_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_DB_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amxor_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_DB_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   amxor_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_WU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax_db.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_DU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammax_db.du  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_WU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin_db.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_DU:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ammin_db.du  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
@@ -5358,193 +5789,193 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         }
         case LA_3R_FLDGT_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fldgt.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FLDGT_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fldgt.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FLDLE_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fldle.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FLDLE_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fldle.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTGT_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fstgt.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTGT_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fstgt.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTLE_S:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fstle.s  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_FSTLE_D:
         {
-            const char* fd = FregName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* fd = RegNames[(code & 0x1f) + 32];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   fstle.d  %s, %s, %s\n", insstrs, fd, rj, rk);
             return;
         }
         case LA_3R_LDGT_B:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldgt.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDGT_H:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldgt.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDGT_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldgt.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDGT_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldgt.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_B:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldle.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_H:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldle.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldle.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   ldle.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STGT_B:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stgt.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STGT_H:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stgt.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STGT_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stgt.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STGT_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stgt.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STLE_B:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stle.b  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STLE_H:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stle.h  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STLE_W:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stle.w  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
         case LA_3R_STLE_D:
         {
-            const char* rd = regName[code & 0x1f];
-            const char* rj = regName[(code >> 5) & 0x1f];
-            const char* rk = regName[(code >> 10) & 0x1f];
+            const char* rd = RegNames[code & 0x1f];
+            const char* rj = RegNames[(code >> 5) & 0x1f];
+            const char* rk = RegNames[(code >> 10) & 0x1f];
             printf("   0x%llx   stle.d  %s, %s, %s\n", insstrs, rd, rj, rk);
             return;
         }
@@ -6314,13 +6745,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
 //------------------------------------------------------------------------
 // emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name.
 //
+// TODO-LoongArch64: supporting SIMD.
 // Arguments:
-//    reg - A general-purpose register or SIMD and floating-point register.
-//    size - A register size.
+//    reg - A general-purpose register orfloating-point register.
+//    size - unused parameter.
 //    varName - unused parameter.
 //
 // Return value:
-//    A string that represents a general-purpose register name or SIMD and floating-point scalar register name.
+//    A string that represents a general-purpose register name or floating-point scalar register name.
 //
 const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName)
 {
@@ -6362,70 +6794,4 @@ bool emitter::IsMovInstruction(instruction ins)
         }
     }
 }
-
-//----------------------------------------------------------------------------------------
-// IsRedundantMov:
-//    Check if the current `mov` instruction is redundant and can be omitted.
-//    A `mov` is redundant in following 3 cases:
-//
-//    1. Move to same register
-//       (Except 4-byte movement like "mov w1, w1" which zeros out upper bits of x1 register)
-//
-//         mov Rx, Rx
-//
-//    2. Move that is identical to last instruction emitted.
-//
-//         mov Rx, Ry  # <-- last instruction
-//         mov Rx, Ry  # <-- current instruction can be omitted.
-//
-//    3. Opposite Move as that of last instruction emitted.
-//
-//         mov Rx, Ry  # <-- last instruction
-//         mov Ry, Rx  # <-- current instruction can be omitted.
-//
-// Arguments:
-//    ins  - The current instruction
-//    size - Operand size of current instruction
-//    dst  - The current destination
-//    src  - The current source
-// canSkip - The move can be skipped as it doesn't represent special semantics
-//
-// Return Value:
-//    true if previous instruction moved from current dst to src.
-
-bool emitter::IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip)
-{
-    NYI_LOONGARCH64("IsRedundantMov-----unimplemented on LOONGARCH64 yet----");
-    return false;
-}
-
-//----------------------------------------------------------------------------------------
-// IsRedundantLdStr:
-//    For ldr/str pair next to each other, check if the current load or store is needed or is
-//    the value already present as of previous instruction.
-//
-//    ldr x1,  [x2, #56]
-//    str x1,  [x2, #56]   <-- redundant
-//
-//          OR
-//
-//    str x1,  [x2, #56]
-//    ldr x1,  [x2, #56]   <-- redundant
-
-// Arguments:
-//    ins  - The current instruction
-//    dst  - The current destination
-//    src  - The current source
-//    imm  - Immediate offset
-//    size - Operand size
-//    fmt  - Format of instruction
-// Return Value:
-//    true if previous instruction already has desired value in register/memory location.
-
-bool emitter::IsRedundantLdStr(
-    instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt)
-{
-    NYI_LOONGARCH64("IsRedundantLdStr-----unimplemented on LOONGARCH64 yet----");
-    return false;
-}
 #endif // defined(TARGET_LOONGARCH64)
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
index cbeb66f7ded82..9b79d544a9a3a 100644
--- a/src/coreclr/jit/emitloongarch64.h
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -88,12 +88,30 @@ static bool isValidSimm12(ssize_t value)
     return -(((int)1) << 11) <= value && value < (((int)1) << 11);
 };
 
+// Returns true if 'value' is a legal unsigned immediate 12 bit encoding.
+static bool isValidUimm12(ssize_t value)
+{
+    return (0 == (value >> 12));
+}
+
+// Returns true if 'value' is a legal unsigned immediate 11 bit encoding.
+static bool isValidUimm11(ssize_t value)
+{
+    return (0 == (value >> 11));
+}
+
 // Returns true if 'value' is a legal signed immediate 20 bit encoding.
 static bool isValidSimm20(ssize_t value)
 {
     return -(((int)1) << 19) <= value && value < (((int)1) << 19);
 };
 
+// Returns true if 'value' is a legal signed immediate 38 bit encoding.
+static bool isValidSimm38(ssize_t value)
+{
+    return -(((ssize_t)1) << 37) <= value && value < (((ssize_t)1) << 37);
+};
+
 // Returns the number of bits used by the given 'size'.
 inline static unsigned getBitWidth(emitAttr size)
 {
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index fa2a28179dd48..826d89dd2a491 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -1203,7 +1203,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                 {
                     // TODO-LoongArch64: For larger block sizes CodeGen can choose to use 16-byte SIMD instructions.
                     // here just used a temp register.
-                    buildInternalFloatRegisterDefForNode(blkNode);
+                    buildInternalIntRegisterDefForNode(blkNode);
                 }
             }
             break;
@@ -1260,27 +1260,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
             switch (blkNode->gtBlkOpKind)
             {
                 case GenTreeBlk::BlkOpKindUnroll:
-                {
                     buildInternalIntRegisterDefForNode(blkNode);
-
-                    const bool isSrcAddrLocal = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ||
-                                                ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr());
-                    const bool isDstAddrLocal = dstAddr->OperIsLocalAddr();
-
-                    // TODO-LoongArch64: using 16-byte SIMD instructions.
-                    const bool srcAddrMayNeedReg =
-                        isSrcAddrLocal || ((srcAddrOrFill != nullptr) && srcAddrOrFill->isContained());
-                    const bool dstAddrMayNeedReg = isDstAddrLocal || dstAddr->isContained();
-
-                    // The following allocates an additional integer register in a case
-                    // when a load instruction and a store instruction cannot be encoded using offset
-                    // from a corresponding base register.
-                    if (srcAddrMayNeedReg && dstAddrMayNeedReg)
-                    {
-                        buildInternalIntRegisterDefForNode(blkNode);
-                    }
-                }
-                break;
+                    break;
 
                 case GenTreeBlk::BlkOpKindHelper:
                     dstAddrRegMask = RBM_ARG_0;
diff --git a/src/coreclr/jit/registerloongarch64.h b/src/coreclr/jit/registerloongarch64.h
index b58b7757b41d2..8f3cd157016bb 100644
--- a/src/coreclr/jit/registerloongarch64.h
+++ b/src/coreclr/jit/registerloongarch64.h
@@ -51,7 +51,7 @@ REGDEF(S8,     31, 0x80000000, "s8"  )
 
 //NOTE for LoongArch64:
 //  The `REG_R21` which alias `REG_X0` is specially reserved !!!
-//  It can be used only by manully and should be very careful!!!
+//  It should be only used with hand written assembly code and should be very careful!!!
 //  e.g. right now LoongArch64's backend-codegen/emit, there is usually
 //  a need for an extra register for cases like
 //  constructing a large imm or offset, saving some intermediate result

From 5e84a3ecc85a1358e670c699e126ca33286ea7b0 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 18 Mar 2022 09:13:43 +0800
Subject: [PATCH 36/46] [LoongArch64] amend some code for CR round2.

---
 src/coreclr/jit/codegen.h                   |   6 -
 src/coreclr/jit/codegencommon.cpp           |  10 +-
 src/coreclr/jit/codegeninterface.h          |   5 -
 src/coreclr/jit/codegenloongarch64.cpp      | 415 +++--------
 src/coreclr/jit/compiler.cpp                |  12 -
 src/coreclr/jit/compiler.h                  |   7 -
 src/coreclr/jit/emit.cpp                    |  16 +-
 src/coreclr/jit/emit.h                      |   8 +-
 src/coreclr/jit/emitloongarch64.cpp         |   4 +-
 src/coreclr/jit/gentree.cpp                 |   7 -
 src/coreclr/jit/instr.cpp                   |   5 +-
 src/coreclr/jit/instr.h                     |   2 +-
 src/coreclr/jit/instrsloongarch64.h         | 780 ++++++++++----------
 src/coreclr/jit/jitconfigvalues.h           |   7 -
 src/coreclr/jit/lclvars.cpp                 | 213 +-----
 src/coreclr/jit/lower.cpp                   |  29 +-
 src/coreclr/jit/lowerloongarch64.cpp        |  49 +-
 src/coreclr/jit/morph.cpp                   |  83 +--
 src/coreclr/jit/register_arg_convention.cpp |   2 +-
 src/coreclr/jit/target.h                    |  23 +-
 src/coreclr/jit/targetloongarch64.h         |   2 +-
 src/coreclr/jit/unwind.h                    |   9 +-
 22 files changed, 588 insertions(+), 1106 deletions(-)

diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 89d8ba379b124..baa48b722454b 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -644,12 +644,6 @@ class CodeGen final : public CodeGenInterface
     bool         genSaveFpLrWithAllCalleeSavedRegisters;
 #endif // TARGET_ARM64
 
-#ifdef TARGET_LOONGARCH64
-    virtual void SetSaveFpRaWithAllCalleeSavedRegisters(bool value);
-    virtual bool IsSaveFpRaWithAllCalleeSavedRegisters() const;
-    bool         genSaveFpRaWithAllCalleeSavedRegisters;
-#endif // TARGET_LOONGARCH64
-
     //-------------------------------------------------------------------------
     //
     // End prolog/epilog generation
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index e69b2cc38aa6d..0a773b0f1ad47 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -127,9 +127,9 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
     /* Assume that we not fully interruptible */
 
     SetInterruptible(false);
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     SetHasTailCalls(false);
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 #ifdef DEBUG
     genInterruptibleUsed = false;
     genCurDispOffset     = (unsigned)-1;
@@ -138,10 +138,6 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
 #ifdef TARGET_ARM64
     genSaveFpLrWithAllCalleeSavedRegisters = false;
 #endif // TARGET_ARM64
-
-#ifdef TARGET_LOONGARCH64
-    genSaveFpRaWithAllCalleeSavedRegisters = false;
-#endif // TARGET_LOONGARCH64
 }
 
 void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
@@ -4298,7 +4294,7 @@ void CodeGen::genEnregisterIncomingStackArgs()
             bool FPbased;
             int  base = compiler->lvaFrameAddress(varNum, &FPbased);
 
-            if ((-2048 <= base) && (base < 2048))
+            if (emitter::isValidSimm12(base))
             {
                 GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
             }
diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h
index f692193104f12..dbd53ffbad46f 100644
--- a/src/coreclr/jit/codegeninterface.h
+++ b/src/coreclr/jit/codegeninterface.h
@@ -196,11 +196,6 @@ class CodeGenInterface
     virtual bool IsSaveFpLrWithAllCalleeSavedRegisters() const      = 0;
 #endif // TARGET_ARM64
 
-#ifdef TARGET_LOONGARCH64
-    virtual void SetSaveFpRaWithAllCalleeSavedRegisters(bool value) = 0;
-    virtual bool IsSaveFpRaWithAllCalleeSavedRegisters() const      = 0;
-#endif // TARGET_LOONGARCH64
-
     regNumber genGetThisArgReg(GenTreeCall* call) const;
 
 #ifdef TARGET_XARCH
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 260b54ee15000..b6a7442c19dce 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1053,18 +1053,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
                                         0);
     }
     else if (genFuncletInfo.fiFrameType == 2)
-    {
-        // fiFrameType constraints:
-        assert(frameSize < 0);
-        assert(frameSize >= -2048);
-
-        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
-        genStackPointerAdjustment(frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
-
-        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8,
-                                        0);
-    }
-    else if (genFuncletInfo.fiFrameType == 3)
     {
         // fiFrameType constraints:
         assert(frameSize < -2048);
@@ -1088,21 +1076,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
 
         genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
     }
-    else if (genFuncletInfo.fiFrameType == 4)
-    {
-        // fiFrameType constraints:
-        assert(frameSize < -2048);
-
-        offset       = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8);
-        int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
-        offset       = SP_delta - offset;
-
-        genStackPointerAdjustment(-SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
-
-        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0);
-
-        genStackPointerAdjustment(frameSize + SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
-    }
     else
     {
         unreached();
@@ -1214,17 +1187,6 @@ void CodeGen::genFuncletEpilog()
         genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
     }
     else if (genFuncletInfo.fiFrameType == 2)
-    {
-        // fiFrameType constraints:
-        assert(frameSize >= -2048);
-        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
-
-        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
-
-        // generate daddiu SP,SP,imm
-        genStackPointerAdjustment(-frameSize, REG_R21, nullptr, /* reportUnwindData */ true);
-    }
-    else if (genFuncletInfo.fiFrameType == 3)
     {
         // fiFrameType constraints:
         assert(frameSize < -2048);
@@ -1251,21 +1213,6 @@ void CodeGen::genFuncletEpilog()
         // second, generate daddiu SP,SP,imm for remaine space.
         genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
     }
-    else if (genFuncletInfo.fiFrameType == 4)
-    {
-        // fiFrameType constraints:
-        assert(frameSize < -2048);
-
-        int offset   = -frameSize - (genFuncletInfo.fiSP_to_PSP_slot_delta + 8);
-        int SP_delta = roundUp((UINT)offset, STACK_ALIGN);
-        offset       = SP_delta - offset;
-
-        genStackPointerAdjustment(-frameSize - SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
-
-        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset, 0);
-
-        genStackPointerAdjustment(SP_delta, REG_R21, nullptr, /* reportUnwindData */ true);
-    }
     else
     {
         unreached();
@@ -1309,16 +1256,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
     unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
     assert((saveRegsCount == compiler->compCalleeRegsPushed) || (saveRegsCount == compiler->compCalleeRegsPushed - 1));
 
-    unsigned saveRegsPlusPSPSize;
-    if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-    {
-        saveRegsPlusPSPSize =
-            roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize /* -2*8*/;
-    }
-    else
-    {
-        saveRegsPlusPSPSize = roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize;
-    }
+    unsigned saveRegsPlusPSPSize =
+        roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize;
 
     if (compiler->info.compIsVarArgs)
     {
@@ -1345,32 +1284,16 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
 
     if (maxFuncletFrameSizeAligned <= (2048 - 8))
     {
-        if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-        {
-            genFuncletInfo.fiFrameType = 1;
-            saveRegsPlusPSPSize -= 2 * 8; // FP/RA
-        }
-        else
-        {
-            genFuncletInfo.fiFrameType = 2;
-            SP_to_FPRA_save_delta += REGSIZE_BYTES + PSPSize;
-        }
+        genFuncletInfo.fiFrameType = 1;
+        saveRegsPlusPSPSize -= 2 * 8; // FP/RA
     }
     else
     {
         unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
         assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
 
-        if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-        {
-            genFuncletInfo.fiFrameType = 3;
-            saveRegsPlusPSPSize -= 2 * 8; // FP/RA
-        }
-        else
-        {
-            genFuncletInfo.fiFrameType = 4;
-            SP_to_FPRA_save_delta += REGSIZE_BYTES + PSPSize;
-        }
+        genFuncletInfo.fiFrameType = 2;
+        saveRegsPlusPSPSize -= 2 * 8; // FP/RA
     }
 
     int CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize;
@@ -4903,16 +4826,7 @@ int CodeGenInterface::genSPtoFPdelta() const
 {
     assert(isFramePointerUsed());
 
-    int delta;
-    if (IsSaveFpRaWithAllCalleeSavedRegisters())
-    {
-        delta = genTotalFrameSize() - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
-                (compiler->compCalleeRegsPushed - 1) * REGSIZE_BYTES;
-    }
-    else
-    {
-        delta = compiler->lvaOutgoingArgSpaceSize;
-    }
+    int delta = compiler->lvaOutgoingArgSpaceSize;
 
     assert(delta >= 0);
     return delta;
@@ -4975,23 +4889,6 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() const
     return callerSPtoSPdelta;
 }
 
-//---------------------------------------------------------------------
-// SetSaveFpRaWithAllCalleeSavedRegisters - Set the variable that indicates if FP/RA registers
-// are stored with the rest of the callee-saved registers.
-void CodeGen::SetSaveFpRaWithAllCalleeSavedRegisters(bool value)
-{
-    JITDUMP("Setting genSaveFpRaWithAllCalleeSavedRegisters to %s\n", dspBool(value));
-    genSaveFpRaWithAllCalleeSavedRegisters = value;
-}
-
-//---------------------------------------------------------------------
-// IsSaveFpRaWithAllCalleeSavedRegisters - Return the value that indicates where FP/RA registers
-// are stored in the prolog.
-bool CodeGen::IsSaveFpRaWithAllCalleeSavedRegisters() const
-{
-    return genSaveFpRaWithAllCalleeSavedRegisters;
-}
-
 /*****************************************************************************
  *  Emit a call to a helper function.
  */
@@ -9031,79 +8928,48 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
             GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize);
             compiler->unwindAllocStack(totalFrameSize);
 
-            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-            {
-                // Case #1.
-                //
-                // Generate:
-                //      daddiu sp, sp, -framesz
-                //      sd fp, outsz(sp)
-                //      sd ra, outsz+8(sp)
-                //
-                // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld.
-                //
-                // After saving callee-saved registers, we establish the frame pointer with:
-                //      daddiu fp, sp, offset-fp
-                // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
-
-                JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
-
-                frameType = 1;
-
-                offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
+            // Case #1.
+            //
+            // Generate:
+            //      daddiu sp, sp, -framesz
+            //      sd fp, outsz(sp)
+            //      sd ra, outsz+8(sp)
+            //
+            // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld.
+            //
+            // After saving callee-saved registers, we establish the frame pointer with:
+            //      daddiu fp, sp, offset-fp
+            // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
 
-                GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp);
-                compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp);
+            JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
 
-                GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8);
-                compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8);
+            frameType = 1;
 
-                maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
+            offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
 
-                offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA
-            }
-            else
-            {
-                frameType = 2;
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp);
+            compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp);
 
-                offsetSpToSavedFp = genSPtoFPdelta();
+            GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8);
+            compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8);
 
-                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, fpDelta:%d\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize,
-                        offsetSpToSavedFp);
+            maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
 
-                offset = compiler->compLclFrameSize;
-            }
+            offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA
         }
         else
         {
-            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-            {
-                JITDUMP("Frame type 3. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
-
-                frameType = 3;
+            JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
 
-                maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
+            frameType = 2;
 
-                offset            = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
-                offset            = calleeSaveSPDelta - offset;
-            }
-            else
-            {
-                frameType = 4;
+            maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
 
-                JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; LclFrameSize=%d, SPDelta-1:%d\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize,
-                        calleeSaveSPDelta);
-
-                offset            = totalFrameSize - compiler->compLclFrameSize;
-                calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
-                offset            = calleeSaveSPDelta - offset;
-                offsetSpToSavedFp = offset + REGSIZE_BYTES;
-            }
+            offset            = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+            calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
+            offset            = calleeSaveSPDelta - offset;
         }
     }
     else
@@ -9124,28 +8990,14 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     JITDUMP("    offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta);
     genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
 
-// For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
-// so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
-// need to add codes at all.
-
-// if (compiler->info.compIsVarArgs)
-//{
-//    JITDUMP("    compIsVarArgs=true\n");
-
-//    // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
-//    assert((offset % 16) == 0);
-//    for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
-//    {
-//        regNumber reg2 = REG_NEXT(reg1);
-//        // sd REG, offset(SP)
-//        // sd REG + 1, (offset+8)(SP)
-//        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg1, REG_SPBASE, offset);
-//        compiler->unwindNop();
-//        GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, reg2, REG_SPBASE, offset + 8);
-//        compiler->unwindNop();
-//        offset += 2 * REGSIZE_BYTES;
-//    }
-//}
+    // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
+    // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
+    // need to add codes at all.
+    if (compiler->info.compIsVarArgs)
+    {
+        JITDUMP("    compIsVarArgs=true\n");
+        NYI_LOONGARCH64("genPushCalleeSavedRegisters - compIsVarArgs");
+    }
 
 #ifdef DEBUG
     if (compiler->opts.disAsm)
@@ -9158,10 +9010,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
         // offsetSpToSavedFp = genSPtoFPdelta();
     }
     else if (frameType == 2)
-    {
-        // offsetSpToSavedFp = genSPtoFPdelta();
-    }
-    else if (frameType == 3)
     {
         if (compiler->lvaOutgoingArgSpaceSize >= 2040)
         {
@@ -9202,19 +9050,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
 
         establishFramePointer = false;
     }
-    else if (frameType == 4)
-    {
-        genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true);
-
-        establishFramePointer = false;
-
-        int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
-
-        if (remainingFrameSz > 0)
-        {
-            genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ true);
-        }
-    }
     else
     {
         unreached();
@@ -9261,131 +9096,84 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
                 compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
             }
 
-            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-            {
-                JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
-                        dspBool(compiler->compLocallocUsed));
+            JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
+                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed));
 
-                frameType = 1;
+            frameType = 1;
 
-                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+            regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
 
-                calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
-            }
-            else
-            {
-                frameType = 2;
-
-                calleeSaveSPOffset = compiler->compLclFrameSize;
-
-                JITDUMP("Frame type 2(Top). #outsz=%d; #framesz=%d; localloc? %s\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
-                        dspBool(compiler->compLocallocUsed));
-            }
-            // calleeSaveSPDelta = 0;
+            calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
         }
         else
         {
-            if (!IsSaveFpRaWithAllCalleeSavedRegisters())
-            {
-                JITDUMP("Frame type 3(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; "
-                        "localloc? %s\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
-                        dspBool(compiler->compLocallocUsed));
-
-                frameType = 3;
-
-                int outSzAligned;
-                if (compiler->lvaOutgoingArgSpaceSize >= 2040)
-                {
-                    int offset         = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                    calleeSaveSPDelta  = AlignUp((UINT)offset, STACK_ALIGN);
-                    calleeSaveSPOffset = calleeSaveSPDelta - offset;
+            JITDUMP("Frame type 2(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; "
+                    "localloc? %s\n",
+                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
+                    dspBool(compiler->compLocallocUsed));
 
-                    int offset2       = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
-                    calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN);
-                    offset2           = calleeSaveSPDelta - offset2;
+            frameType = 2;
 
-                    if (compiler->compLocallocUsed)
-                    {
-                        // Restore sp from fp
-                        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
-                        compiler->unwindSetFrameReg(REG_FPBASE, offset2);
-                    }
-                    else
-                    {
-                        outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf;
-                        genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true);
-                    }
-
-                    regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
-
-                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
-                    compiler->unwindSaveReg(REG_RA, offset2 + 8);
-
-                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
-                    compiler->unwindSaveReg(REG_FP, offset2);
+            int outSzAligned;
+            if (compiler->lvaOutgoingArgSpaceSize >= 2040)
+            {
+                int offset         = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                calleeSaveSPDelta  = AlignUp((UINT)offset, STACK_ALIGN);
+                calleeSaveSPOffset = calleeSaveSPDelta - offset;
 
-                    genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+                int offset2       = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
+                calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN);
+                offset2           = calleeSaveSPDelta - offset2;
 
-                    calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                    calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
+                if (compiler->compLocallocUsed)
+                {
+                    // Restore sp from fp
+                    GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
+                    compiler->unwindSetFrameReg(REG_FPBASE, offset2);
                 }
                 else
                 {
-                    int offset2 = compiler->lvaOutgoingArgSpaceSize;
-                    if (compiler->compLocallocUsed)
-                    {
-                        // Restore sp from fp
-                        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
-                        compiler->unwindSetFrameReg(REG_FPBASE, offset2);
-                    }
+                    outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf;
+                    genStackPointerAdjustment(outSzAligned, REG_R21, nullptr, /* reportUnwindData */ true);
+                }
 
-                    regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
 
-                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
-                    compiler->unwindSaveReg(REG_RA, offset2 + 8);
+                GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
+                compiler->unwindSaveReg(REG_RA, offset2 + 8);
 
-                    GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
-                    compiler->unwindSaveReg(REG_FP, offset2);
+                GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
+                compiler->unwindSaveReg(REG_FP, offset2);
 
-                    calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                    calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
-                    calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
+                genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
 
-                    genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr,
-                                              /* reportUnwindData */ true);
-                }
+                calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
             }
             else
             {
-                frameType = 4;
-
-                JITDUMP("Frame type 4(Top). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; localloc? %s\n",
-                        unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
-                        dspBool(compiler->compLocallocUsed));
-
-                calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize;
-                calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
-                calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
-
+                int offset2 = compiler->lvaOutgoingArgSpaceSize;
                 if (compiler->compLocallocUsed)
                 {
-                    calleeSaveSPDelta = calleeSaveSPOffset + REGSIZE_BYTES;
-
                     // Restore sp from fp
-                    GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -calleeSaveSPDelta);
-                    compiler->unwindSetFrameReg(REG_FPBASE, calleeSaveSPDelta);
-                }
-                else
-                {
-                    calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta;
-                    genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+                    GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
+                    compiler->unwindSetFrameReg(REG_FPBASE, offset2);
                 }
 
-                calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize;
-                calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
+                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+
+                GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
+                compiler->unwindSaveReg(REG_RA, offset2 + 8);
+
+                GetEmitter()->emitIns_R_R_I(INS_ld_d, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
+                compiler->unwindSaveReg(REG_FP, offset2);
+
+                calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
+                calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
+                calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
+
+                genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, REG_R21, nullptr,
+                                          /* reportUnwindData */ true);
             }
         }
     }
@@ -9414,16 +9202,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
     }
     else if (frameType == 2)
     {
-        GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
-        compiler->unwindAllocStack(totalFrameSize);
-    }
-    else if (frameType == 3)
-    {
-        // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
-    }
-    else if (frameType == 4)
-    {
-        // genStackPointerAdjustment(calleeSaveSPDelta, REG_R21, nullptr, /* reportUnwindData */ true);
+        // had done.
     }
     else
     {
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 49a0005a810e7..be5c3e5a41c43 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -2858,11 +2858,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     opts.compJitSaveFpLrWithCalleeSavedRegisters = 0;
 #endif // defined(TARGET_ARM64)
 
-#if defined(TARGET_LOONGARCH64)
-    // 0 is default: use the appropriate frame type based on the function.
-    opts.compJitSaveFpRaWithCalleeSavedRegisters = 0;
-#endif // defined(TARGET_LOONGARCH64)
-
 #ifdef DEBUG
     opts.dspInstrs       = false;
     opts.dspLines        = false;
@@ -3361,13 +3356,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
         opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters();
     }
 #endif // defined(DEBUG) && defined(TARGET_ARM64)
-
-#if defined(DEBUG) && defined(TARGET_LOONGARCH64)
-    if ((s_pJitMethodSet == nullptr) || s_pJitMethodSet->IsActiveMethod(info.compFullName, info.compMethodHash()))
-    {
-        opts.compJitSaveFpRaWithCalleeSavedRegisters = JitConfig.JitSaveFpRaWithCalleeSavedRegisters();
-    }
-#endif // defined(DEBUG) && defined(TARGET_LOONGARCH64)
 }
 
 #ifdef DEBUG
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index e0aac6909a6c0..328b4fff82553 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -10007,13 +10007,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         int compJitSaveFpLrWithCalleeSavedRegisters;
 #endif // defined(TARGET_ARM64)
 
-#if defined(TARGET_LOONGARCH64)
-        // Decision about whether to save FP/RA registers with callee-saved registers (see
-        // COMPlus_JitSaveFpRaWithCalleSavedRegisters).
-        // TODO: will delete this in future.
-        int compJitSaveFpRaWithCalleeSavedRegisters;
-#endif // defined(TARGET_LOONGARCH64)
-
 #ifdef CONFIGURABLE_ARM_ABI
         bool compUseSoftFP = false;
 #else
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index 55d517f996d1d..c8f0b0701a70a 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -6617,10 +6617,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
         ig->igSize = (unsigned short)(cp - bp);
     }
 
-#ifdef TARGET_LOONGARCH64
-    unsigned actualCodeSize = (unsigned)(cp - codeBlock);
-#endif
-
 #if EMIT_TRACK_STACK_DEPTH
     assert(emitCurStackLvl == 0);
 #endif
@@ -6661,7 +6657,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
         emitUpdateLiveGCregs(GCT_GCREF, RBM_NONE, cp);
     }
 
-#ifndef TARGET_LOONGARCH64
     /* Patch any forward jumps */
 
     if (emitFwdJumps)
@@ -6726,6 +6721,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
 #elif defined(TARGET_ARM64)
                     assert(!jmp->idAddr()->iiaHasInstrCount());
                     emitOutputLJ(NULL, adr, jmp);
+#elif defined(TARGET_LOONGARCH64)
+                    // For LoongArch64 `emitFwdJumps` is always false.
+                    unreached();
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -6739,6 +6737,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
 #elif defined(TARGET_ARMARCH)
                     assert(!jmp->idAddr()->iiaHasInstrCount());
                     emitOutputLJ(NULL, adr, jmp);
+#elif defined(TARGET_LOONGARCH64)
+                    // For LoongArch64 `emitFwdJumps` is always false.
+                    unreached();
 #else
 #error Unsupported or unset target architecture
 #endif
@@ -6746,7 +6747,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
             }
         }
     }
-#endif //! TARGET_LOONGARCH64
 
 #ifdef DEBUG
     if (emitComp->opts.disAsm)
@@ -6755,9 +6755,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
     }
 #endif
 
-#ifndef TARGET_LOONGARCH64
     unsigned actualCodeSize = emitCurCodeOffs(cp);
-#endif
 
 #if defined(TARGET_ARM64)
     assert(emitTotalCodeSize == actualCodeSize);
@@ -6848,7 +6846,6 @@ void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp)
  *  instruction number for this instruction
  */
 
-#ifndef TARGET_LOONGARCH64
 unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch)
 {
     instrDesc* id = (instrDesc*)ig->igData;
@@ -6877,7 +6874,6 @@ unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch)
     assert(!"emitFindInsNum failed");
     return -1;
 }
-#endif
 
 /*****************************************************************************
  *
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
index c64a67192b645..60971839bc507 100644
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -1790,13 +1790,11 @@ class emitter
 
 #endif // FEATURE_EH_FUNCLETS
 
-/************************************************************************/
-/*    Methods to record a code position and later convert to offset     */
-/************************************************************************/
+    /************************************************************************/
+    /*    Methods to record a code position and later convert to offset     */
+    /************************************************************************/
 
-#ifndef TARGET_LOONGARCH64
     unsigned emitFindInsNum(insGroup* ig, instrDesc* id);
-#endif
     UNATIVE_OFFSET emitFindOffset(insGroup* ig, unsigned insNum);
 
 /************************************************************************/
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index c7ba9f4a55ffe..098227a7ce230 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -500,7 +500,7 @@ bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
 // clang-format off
 /*static*/ const BYTE CodeGenInterface::instInfo[] =
 {
-    #define INST(id, nm, fp, info, fmt, e1) info,
+    #define INST(id, nm, info, e1) info,
     #include "instrs.h"
 };
 // clang-format on
@@ -556,7 +556,7 @@ inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/)
     // clang-format off
     const static code_t insCode[] =
     {
-        #define INST(id, nm, fp, info, fmt, e1) e1,
+        #define INST(id, nm, info, e1) e1,
         #include "instrs.h"
     };
     // clang-format on
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 2b305d018d86e..18b5fee55e7cc 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -6108,13 +6108,6 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
     switch (type)
     {
         case TYP_INT:
-#ifdef TARGET_LOONGARCH64
-        case TYP_UINT:
-// For LoongArch64, the register $r0 is always const-zero with 64bits-width.
-// Besides the instructions's operation of the 64bits and 32bits using the whole
-// 64bits-width register which is unlike the AMD64 and ARM64.
-// So for UINT type, LoongArch64 can't share with INT liking AMD64 and ARM64.
-#endif
             zero = gtNewIconNode(0);
             break;
 
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index bae791f106393..6406ceb3715da 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -67,7 +67,7 @@ const char* CodeGen::genInsName(instruction ins)
         #include "instrs.h"
 
 #elif defined(TARGET_LOONGARCH64)
-        #define INST(id, nm, fp, ldst, fmt, e1) nm,
+        #define INST(id, nm, ldst, e1) nm,
         #include "instrs.h"
 
 #else
@@ -425,7 +425,8 @@ void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr s
     }
 
 #ifdef TARGET_LOONGARCH64
-    NYI_LOONGARCH64("inst_RV-----unimplemented/unused on LOONGARCH64 yet----");
+    // inst_RV is not used for LoongArch64, so there is no need to define `emitIns_R`.
+    NYI_LOONGARCH64("inst_RV-----unused on LOONGARCH64----");
 #else
     GetEmitter()->emitIns_R(ins, size, reg);
 #endif
diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h
index 92b1c6f63f065..a01492d08b8a9 100644
--- a/src/coreclr/jit/instr.h
+++ b/src/coreclr/jit/instr.h
@@ -52,7 +52,7 @@ enum instruction : unsigned
     INS_lea,   // Not a real instruction. It is used for load the address of stack locals
 
 #elif defined(TARGET_LOONGARCH64)
-    #define INST(id, nm, fp, ldst, fmt, e1) INS_##id,
+    #define INST(id, nm, ldst, e1) INS_##id,
     #include "instrs.h"
 
     INS_lea,   // Not a real instruction. It is used for load the address of stack locals
diff --git a/src/coreclr/jit/instrsloongarch64.h b/src/coreclr/jit/instrsloongarch64.h
index e3da6728fd982..ada87672e397a 100644
--- a/src/coreclr/jit/instrsloongarch64.h
+++ b/src/coreclr/jit/instrsloongarch64.h
@@ -6,9 +6,7 @@
  *
  *          id          -- the enum name for the instruction
  *          nm          -- textual name (for assembly dipslay)
- *          fp          -- floating point instruction
  *          ld/st/cmp   -- load/store/compare instruction
- *          fmt         -- encoding format used by this instruction
  *          encode      -- encoding 1
  *
 ******************************************************************************/
@@ -31,32 +29,32 @@
 //     emitInsMayWriteMultipleRegs in emitLoongarch64.cpp.
 
 // clang-format off
-INST(invalid,       "INVALID",        0,    0,    IF_NONE,  BAD_CODE)
-INST(nop ,          "nop",            0,    0,    IF_LA,    0x03400000)
+INST(invalid,       "INVALID",        0,    BAD_CODE)
+INST(nop ,          "nop",            0,    0x03400000)
 
                     // INS_bceqz/INS_beq/INS_blt/INS_bltu must be even number.
-INST(bceqz,         "bceqz",          0,    0,    IF_LA,    0x48000000)
-INST(bcnez,         "bcnez",          0,    0,    IF_LA,    0x48000100)
+INST(bceqz,         "bceqz",          0,    0x48000000)
+INST(bcnez,         "bcnez",          0,    0x48000100)
 
-INST(beq,           "beq",            0,    0,    IF_LA,    0x58000000)
-INST(bne,           "bne",            0,    0,    IF_LA,    0x5c000000)
+INST(beq,           "beq",            0,    0x58000000)
+INST(bne,           "bne",            0,    0x5c000000)
 
-INST(blt,           "blt",            0,    0,    IF_LA,    0x60000000)
-INST(bge,           "bge",            0,    0,    IF_LA,    0x64000000)
-INST(bltu,          "bltu",           0,    0,    IF_LA,    0x68000000)
-INST(bgeu,          "bgeu",           0,    0,    IF_LA,    0x6c000000)
+INST(blt,           "blt",            0,    0x60000000)
+INST(bge,           "bge",            0,    0x64000000)
+INST(bltu,          "bltu",           0,    0x68000000)
+INST(bgeu,          "bgeu",           0,    0x6c000000)
 
 ////R_I.
-INST(beqz,          "beqz",           0,    0,    IF_LA,    0x40000000)
-INST(bnez,          "bnez",           0,    0,    IF_LA,    0x44000000)
+INST(beqz,          "beqz",           0,    0x40000000)
+INST(bnez,          "bnez",           0,    0x44000000)
 
 ////I.
-INST(b,             "b",              0,    0,    IF_LA,    0x50000000)
-INST(bl,            "bl",             0,    0,    IF_LA,    0x54000000)
+INST(b,             "b",              0,    0x50000000)
+INST(bl,            "bl",             0,    0x54000000)
 
 ///////////////////////////////////////////////////////////////////////////////////////////
 ////NOTE:  Begin
-////     the fllowing instructions will be used by emitter::emitInsMayWriteToGCReg().
+////     the following instructions will be used by emitter::emitInsMayWriteToGCReg().
 ////////////////////////////////////////////////
 //    enum     name     FP LD/ST   FMT   ENCODE
 //
@@ -64,214 +62,214 @@ INST(bl,            "bl",             0,    0,    IF_LA,    0x54000000)
 ///////////////////////////////////////////////////////////////////////////////////////////
 //  mov     rd,rj
 //  In fact, mov is an alias instruction, "ori rd,rj,0"
-INST(mov,           "mov",            0,    0,    IF_LA,    0x03800000)
+INST(mov,           "mov",            0,    0x03800000)
                     //dneg is a alias instruction.
                     //sub_d rd, zero, rk
-INST(dneg,          "dneg",           0,    0,    IF_LA,    0x00118000)
+INST(dneg,          "dneg",           0,    0x00118000)
                     //neg is a alias instruction.
                     //sub_w rd, zero, rk
-INST(neg,           "neg",            0,    0,    IF_LA,    0x00110000)
+INST(neg,           "neg",            0,    0x00110000)
                     //not is a alias instruction.
                     //nor rd, rj, zero
-INST(not,           "not",            0,    0,    IF_LA,    0x00140000)
+INST(not,           "not",            0,    0x00140000)
 
 //   enum:id        name             FP   LD/ST   Formate   ENCODE
 ////R_R_R.
-INST(add_w,         "add.w",          0,    0,    IF_LA,    0x00100000)
-INST(add_d,         "add.d",          0,    0,    IF_LA,    0x00108000)
-INST(sub_w,         "sub.w",          0,    0,    IF_LA,    0x00110000)
-INST(sub_d,         "sub.d",          0,    0,    IF_LA,    0x00118000)
-
-INST(and,           "and",            0,    0,    IF_LA,    0x00148000)
-INST(or,            "or",             0,    0,    IF_LA,    0x00150000)
-INST(nor,           "nor",            0,    0,    IF_LA,    0x00140000)
-INST(xor,           "xor",            0,    0,    IF_LA,    0x00158000)
-INST(andn,          "andn",           0,    0,    IF_LA,    0x00168000)
-INST(orn,           "orn",            0,    0,    IF_LA,    0x00160000)
-
-INST(mul_w,         "mul.w",          0,    0,    IF_LA,    0x001c0000)
-INST(mul_d,         "mul.d",          0,    0,    IF_LA,    0x001d8000)
-INST(mulh_w,        "mulh.w",         0,    0,    IF_LA,    0x001c8000)
-INST(mulh_wu,       "mulh.wu",        0,    0,    IF_LA,    0x001d0000)
-INST(mulh_d,        "mulh.d",         0,    0,    IF_LA,    0x001e0000)
-INST(mulh_du,       "mulh.du",        0,    0,    IF_LA,    0x001e8000)
-INST(mulw_d_w,      "mulw.d.w",       0,    0,    IF_LA,    0x001f0000)
-INST(mulw_d_wu,     "mulw.d.wu",      0,    0,    IF_LA,    0x001f8000)
-INST(div_w,         "div.w",          0,    0,    IF_LA,    0x00200000)
-INST(div_wu,        "div.wu",         0,    0,    IF_LA,    0x00210000)
-INST(div_d,         "div.d",          0,    0,    IF_LA,    0x00220000)
-INST(div_du,        "div.du",         0,    0,    IF_LA,    0x00230000)
-INST(mod_w,         "mod.w",          0,    0,    IF_LA,    0x00208000)
-INST(mod_wu,        "mod.wu",         0,    0,    IF_LA,    0x00218000)
-INST(mod_d,         "mod.d",          0,    0,    IF_LA,    0x00228000)
-INST(mod_du,        "mod.du",         0,    0,    IF_LA,    0x00238000)
-
-INST(sll_w,         "sll.w",          0,    0,    IF_LA,    0x00170000)
-INST(srl_w,         "srl.w",          0,    0,    IF_LA,    0x00178000)
-INST(sra_w,         "sra.w",          0,    0,    IF_LA,    0x00180000)
-INST(rotr_w,        "rotr_w",         0,    0,    IF_LA,    0x001b0000)
-INST(sll_d,         "sll.d",          0,    0,    IF_LA,    0x00188000)
-INST(srl_d,         "srl.d",          0,    0,    IF_LA,    0x00190000)
-INST(sra_d,         "sra.d",          0,    0,    IF_LA,    0x00198000)
-INST(rotr_d,        "rotr.d",         0,    0,    IF_LA,    0x001b8000)
-
-INST(maskeqz,       "maskeqz",        0,    0,    IF_LA,    0x00130000)
-INST(masknez,       "masknez",        0,    0,    IF_LA,    0x00138000)
-
-INST(slt,           "slt",            0,    0,    IF_LA,    0x00120000)
-INST(sltu,          "sltu",           0,    0,    IF_LA,    0x00128000)
-
-INST(amswap_w,      "amswap.w",       0,    0,    IF_LA,    0x38600000)
-INST(amswap_d,      "amswap.d",       0,    0,    IF_LA,    0x38608000)
-INST(amswap_db_w,   "amswap_db.w",    0,    0,    IF_LA,    0x38690000)
-INST(amswap_db_d,   "amswap_db.d",    0,    0,    IF_LA,    0x38698000)
-INST(amadd_w,       "amadd.w",        0,    0,    IF_LA,    0x38610000)
-INST(amadd_d,       "amadd.d",        0,    0,    IF_LA,    0x38618000)
-INST(amadd_db_w,    "amadd_db.w",     0,    0,    IF_LA,    0x386a0000)
-INST(amadd_db_d,    "amadd_db.d",     0,    0,    IF_LA,    0x386a8000)
-INST(amand_w,       "amand.w",        0,    0,    IF_LA,    0x38620000)
-INST(amand_d,       "amand.d",        0,    0,    IF_LA,    0x38628000)
-INST(amand_db_w,    "amand_db.w",     0,    0,    IF_LA,    0x386b0000)
-INST(amand_db_d,    "amand_db.d",     0,    0,    IF_LA,    0x386b8000)
-INST(amor_w,        "amor.w",         0,    0,    IF_LA,    0x38630000)
-INST(amor_d,        "amor.d",         0,    0,    IF_LA,    0x38638000)
-INST(amor_db_w,     "amor_db.w",      0,    0,    IF_LA,    0x386c0000)
-INST(amor_db_d,     "amor_db.d",      0,    0,    IF_LA,    0x386c8000)
-INST(amxor_w,       "amxor.w",        0,    0,    IF_LA,    0x38640000)
-INST(amxor_d,       "amxor.d",        0,    0,    IF_LA,    0x38648000)
-INST(amxor_db_w,    "amxor_db.w",     0,    0,    IF_LA,    0x386d0000)
-INST(amxor_db_d,    "amxor_db.d",     0,    0,    IF_LA,    0x386d8000)
-INST(ammax_w,       "ammax.w",        0,    0,    IF_LA,    0x38650000)
-INST(ammax_d,       "ammax.d",        0,    0,    IF_LA,    0x38658000)
-INST(ammax_db_w,    "ammax_db.w",     0,    0,    IF_LA,    0x386e0000)
-INST(ammax_db_d,    "ammax_db.d",     0,    0,    IF_LA,    0x386e8000)
-INST(ammin_w,       "ammin.w",        0,    0,    IF_LA,    0x38660000)
-INST(ammin_d,       "ammin.d",        0,    0,    IF_LA,    0x38668000)
-INST(ammin_db_w,    "ammin_db.w",     0,    0,    IF_LA,    0x386f0000)
-INST(ammin_db_d,    "ammin_db.d",     0,    0,    IF_LA,    0x386f8000)
-INST(ammax_wu,      "ammax.wu",       0,    0,    IF_LA,    0x38670000)
-INST(ammax_du,      "ammax.du",       0,    0,    IF_LA,    0x38678000)
-INST(ammax_db_wu,   "ammax_db.wu",    0,    0,    IF_LA,    0x38700000)
-INST(ammax_db_du,   "ammax_db.du",    0,    0,    IF_LA,    0x38708000)
-INST(ammin_wu,      "ammin.wu",       0,    0,    IF_LA,    0x38680000)
-INST(ammin_du,      "ammin.du",       0,    0,    IF_LA,    0x38688000)
-INST(ammin_db_wu,   "ammin_db.wu",    0,    0,    IF_LA,    0x38710000)
-INST(ammin_db_du,   "ammin_db.du",    0,    0,    IF_LA,    0x38718000)
-
-INST(crc_w_b_w,     "crc.w.b.w",      0,    0,    IF_LA,    0x00240000)
-INST(crc_w_h_w,     "crc.w.h.w",      0,    0,    IF_LA,    0x00248000)
-INST(crc_w_w_w,     "crc.w.w.w",      0,    0,    IF_LA,    0x00250000)
-INST(crc_w_d_w,     "crc.w.d.w",      0,    0,    IF_LA,    0x00258000)
-INST(crcc_w_b_w,    "crcc.w.b.w",     0,    0,    IF_LA,    0x00260000)
-INST(crcc_w_h_w,    "crcc.w.h.w",     0,    0,    IF_LA,    0x00268000)
-INST(crcc_w_w_w,    "crcc.w.w.w",     0,    0,    IF_LA,    0x00270000)
-INST(crcc_w_d_w,    "crcc.w.d.w",     0,    0,    IF_LA,    0x00278000)
+INST(add_w,         "add.w",          0,    0x00100000)
+INST(add_d,         "add.d",          0,    0x00108000)
+INST(sub_w,         "sub.w",          0,    0x00110000)
+INST(sub_d,         "sub.d",          0,    0x00118000)
+
+INST(and,           "and",            0,    0x00148000)
+INST(or,            "or",             0,    0x00150000)
+INST(nor,           "nor",            0,    0x00140000)
+INST(xor,           "xor",            0,    0x00158000)
+INST(andn,          "andn",           0,    0x00168000)
+INST(orn,           "orn",            0,    0x00160000)
+
+INST(mul_w,         "mul.w",          0,    0x001c0000)
+INST(mul_d,         "mul.d",          0,    0x001d8000)
+INST(mulh_w,        "mulh.w",         0,    0x001c8000)
+INST(mulh_wu,       "mulh.wu",        0,    0x001d0000)
+INST(mulh_d,        "mulh.d",         0,    0x001e0000)
+INST(mulh_du,       "mulh.du",        0,    0x001e8000)
+INST(mulw_d_w,      "mulw.d.w",       0,    0x001f0000)
+INST(mulw_d_wu,     "mulw.d.wu",      0,    0x001f8000)
+INST(div_w,         "div.w",          0,    0x00200000)
+INST(div_wu,        "div.wu",         0,    0x00210000)
+INST(div_d,         "div.d",          0,    0x00220000)
+INST(div_du,        "div.du",         0,    0x00230000)
+INST(mod_w,         "mod.w",          0,    0x00208000)
+INST(mod_wu,        "mod.wu",         0,    0x00218000)
+INST(mod_d,         "mod.d",          0,    0x00228000)
+INST(mod_du,        "mod.du",         0,    0x00238000)
+
+INST(sll_w,         "sll.w",          0,    0x00170000)
+INST(srl_w,         "srl.w",          0,    0x00178000)
+INST(sra_w,         "sra.w",          0,    0x00180000)
+INST(rotr_w,        "rotr_w",         0,    0x001b0000)
+INST(sll_d,         "sll.d",          0,    0x00188000)
+INST(srl_d,         "srl.d",          0,    0x00190000)
+INST(sra_d,         "sra.d",          0,    0x00198000)
+INST(rotr_d,        "rotr.d",         0,    0x001b8000)
+
+INST(maskeqz,       "maskeqz",        0,    0x00130000)
+INST(masknez,       "masknez",        0,    0x00138000)
+
+INST(slt,           "slt",            0,    0x00120000)
+INST(sltu,          "sltu",           0,    0x00128000)
+
+INST(amswap_w,      "amswap.w",       0,    0x38600000)
+INST(amswap_d,      "amswap.d",       0,    0x38608000)
+INST(amswap_db_w,   "amswap_db.w",    0,    0x38690000)
+INST(amswap_db_d,   "amswap_db.d",    0,    0x38698000)
+INST(amadd_w,       "amadd.w",        0,    0x38610000)
+INST(amadd_d,       "amadd.d",        0,    0x38618000)
+INST(amadd_db_w,    "amadd_db.w",     0,    0x386a0000)
+INST(amadd_db_d,    "amadd_db.d",     0,    0x386a8000)
+INST(amand_w,       "amand.w",        0,    0x38620000)
+INST(amand_d,       "amand.d",        0,    0x38628000)
+INST(amand_db_w,    "amand_db.w",     0,    0x386b0000)
+INST(amand_db_d,    "amand_db.d",     0,    0x386b8000)
+INST(amor_w,        "amor.w",         0,    0x38630000)
+INST(amor_d,        "amor.d",         0,    0x38638000)
+INST(amor_db_w,     "amor_db.w",      0,    0x386c0000)
+INST(amor_db_d,     "amor_db.d",      0,    0x386c8000)
+INST(amxor_w,       "amxor.w",        0,    0x38640000)
+INST(amxor_d,       "amxor.d",        0,    0x38648000)
+INST(amxor_db_w,    "amxor_db.w",     0,    0x386d0000)
+INST(amxor_db_d,    "amxor_db.d",     0,    0x386d8000)
+INST(ammax_w,       "ammax.w",        0,    0x38650000)
+INST(ammax_d,       "ammax.d",        0,    0x38658000)
+INST(ammax_db_w,    "ammax_db.w",     0,    0x386e0000)
+INST(ammax_db_d,    "ammax_db.d",     0,    0x386e8000)
+INST(ammin_w,       "ammin.w",        0,    0x38660000)
+INST(ammin_d,       "ammin.d",        0,    0x38668000)
+INST(ammin_db_w,    "ammin_db.w",     0,    0x386f0000)
+INST(ammin_db_d,    "ammin_db.d",     0,    0x386f8000)
+INST(ammax_wu,      "ammax.wu",       0,    0x38670000)
+INST(ammax_du,      "ammax.du",       0,    0x38678000)
+INST(ammax_db_wu,   "ammax_db.wu",    0,    0x38700000)
+INST(ammax_db_du,   "ammax_db.du",    0,    0x38708000)
+INST(ammin_wu,      "ammin.wu",       0,    0x38680000)
+INST(ammin_du,      "ammin.du",       0,    0x38688000)
+INST(ammin_db_wu,   "ammin_db.wu",    0,    0x38710000)
+INST(ammin_db_du,   "ammin_db.du",    0,    0x38718000)
+
+INST(crc_w_b_w,     "crc.w.b.w",      0,    0x00240000)
+INST(crc_w_h_w,     "crc.w.h.w",      0,    0x00248000)
+INST(crc_w_w_w,     "crc.w.w.w",      0,    0x00250000)
+INST(crc_w_d_w,     "crc.w.d.w",      0,    0x00258000)
+INST(crcc_w_b_w,    "crcc.w.b.w",     0,    0x00260000)
+INST(crcc_w_h_w,    "crcc.w.h.w",     0,    0x00268000)
+INST(crcc_w_w_w,    "crcc.w.w.w",     0,    0x00270000)
+INST(crcc_w_d_w,    "crcc.w.d.w",     0,    0x00278000)
 
 ////R_R_R_I.
-INST(alsl_w,        "alsl.w",         0,    0,    IF_LA,    0x00040000)
-INST(alsl_wu,       "alsl.wu",        0,    0,    IF_LA,    0x00060000)
-INST(alsl_d,        "alsl.d",         0,    0,    IF_LA,    0x002c0000)
+INST(alsl_w,        "alsl.w",         0,    0x00040000)
+INST(alsl_wu,       "alsl.wu",        0,    0x00060000)
+INST(alsl_d,        "alsl.d",         0,    0x002c0000)
 
-INST(bytepick_w,    "bytepick.w",     0,    0,    IF_LA,    0x00080000)
-INST(bytepick_d,    "bytepick.d",     0,    0,    IF_LA,    0x000c0000)
+INST(bytepick_w,    "bytepick.w",     0,    0x00080000)
+INST(bytepick_d,    "bytepick.d",     0,    0x000c0000)
 
-INST(fsel,          "fsel",           0,    0,    IF_LA,    0x0d000000)
+INST(fsel,          "fsel",           0,    0x0d000000)
 
 ////R_I.
-INST(lu12i_w,       "lu12i.w",        0,    0,    IF_LA,    0x14000000)
-INST(lu32i_d,       "lu32i.d",        0,    0,    IF_LA,    0x16000000)
+INST(lu12i_w,       "lu12i.w",        0,    0x14000000)
+INST(lu32i_d,       "lu32i.d",        0,    0x16000000)
 
-INST(pcaddi,        "pcaddi",         0,    0,    IF_LA,    0x18000000)
-INST(pcaddu12i,     "pcaddu12i",      0,    0,    IF_LA,    0x1c000000)
-INST(pcalau12i,     "pcalau12i",      0,    0,    IF_LA,    0x1a000000)
-INST(pcaddu18i,     "pcaddu18i",      0,    0,    IF_LA,    0x1e000000)
+INST(pcaddi,        "pcaddi",         0,    0x18000000)
+INST(pcaddu12i,     "pcaddu12i",      0,    0x1c000000)
+INST(pcalau12i,     "pcalau12i",      0,    0x1a000000)
+INST(pcaddu18i,     "pcaddu18i",      0,    0x1e000000)
 
 ////R_R.
-INST(ext_w_b,       "ext.w.b",        0,    0,    IF_LA,    0x00005c00)
-INST(ext_w_h,       "ext.w.h",        0,    0,    IF_LA,    0x00005800)
-INST(clo_w,         "clo.w",          0,    0,    IF_LA,    0x00001000)
-INST(clz_w,         "clz.w",          0,    0,    IF_LA,    0x00001400)
-INST(cto_w,         "cto.w",          0,    0,    IF_LA,    0x00001800)
-INST(ctz_w,         "ctz.w",          0,    0,    IF_LA,    0x00001c00)
-INST(clo_d,         "clo.d",          0,    0,    IF_LA,    0x00002000)
-INST(clz_d,         "clz.d",          0,    0,    IF_LA,    0x00002400)
-INST(cto_d,         "cto.d",          0,    0,    IF_LA,    0x00002800)
-INST(ctz_d,         "ctz.d",          0,    0,    IF_LA,    0x00002c00)
-INST(revb_2h,       "revb.2h",        0,    0,    IF_LA,    0x00003000)
-INST(revb_4h,       "revb.4h",        0,    0,    IF_LA,    0x00003400)
-INST(revb_2w,       "revb.2w",        0,    0,    IF_LA,    0x00003800)
-INST(revb_d,        "revb.d",         0,    0,    IF_LA,    0x00003c00)
-INST(revh_2w,       "revh.2w",        0,    0,    IF_LA,    0x00004000)
-INST(revh_d,        "revh.d",         0,    0,    IF_LA,    0x00004400)
-INST(bitrev_4b,     "bitrev.4b",      0,    0,    IF_LA,    0x00004800)
-INST(bitrev_8b,     "bitrev.8b",      0,    0,    IF_LA,    0x00004c00)
-INST(bitrev_w,      "bitrev.w",       0,    0,    IF_LA,    0x00005000)
-INST(bitrev_d,      "bitrev.d",       0,    0,    IF_LA,    0x00005400)
-INST(rdtimel_w,     "rdtimel.w",      0,    0,    IF_LA,    0x00006000)
-INST(rdtimeh_w,     "rdtimeh.w",      0,    0,    IF_LA,    0x00006400)
-INST(rdtime_d,      "rdtime.d",       0,    0,    IF_LA,    0x00006800)
-INST(cpucfg,        "cpucfg",         0,    0,    IF_LA,    0x00006c00)
+INST(ext_w_b,       "ext.w.b",        0,    0x00005c00)
+INST(ext_w_h,       "ext.w.h",        0,    0x00005800)
+INST(clo_w,         "clo.w",          0,    0x00001000)
+INST(clz_w,         "clz.w",          0,    0x00001400)
+INST(cto_w,         "cto.w",          0,    0x00001800)
+INST(ctz_w,         "ctz.w",          0,    0x00001c00)
+INST(clo_d,         "clo.d",          0,    0x00002000)
+INST(clz_d,         "clz.d",          0,    0x00002400)
+INST(cto_d,         "cto.d",          0,    0x00002800)
+INST(ctz_d,         "ctz.d",          0,    0x00002c00)
+INST(revb_2h,       "revb.2h",        0,    0x00003000)
+INST(revb_4h,       "revb.4h",        0,    0x00003400)
+INST(revb_2w,       "revb.2w",        0,    0x00003800)
+INST(revb_d,        "revb.d",         0,    0x00003c00)
+INST(revh_2w,       "revh.2w",        0,    0x00004000)
+INST(revh_d,        "revh.d",         0,    0x00004400)
+INST(bitrev_4b,     "bitrev.4b",      0,    0x00004800)
+INST(bitrev_8b,     "bitrev.8b",      0,    0x00004c00)
+INST(bitrev_w,      "bitrev.w",       0,    0x00005000)
+INST(bitrev_d,      "bitrev.d",       0,    0x00005400)
+INST(rdtimel_w,     "rdtimel.w",      0,    0x00006000)
+INST(rdtimeh_w,     "rdtimeh.w",      0,    0x00006400)
+INST(rdtime_d,      "rdtime.d",       0,    0x00006800)
+INST(cpucfg,        "cpucfg",         0,    0x00006c00)
 
 ////R_R_I_I.
-INST(bstrins_w,     "bstrins.w",      0,    0,    IF_LA,    0x00600000)
-INST(bstrins_d,     "bstrins.d",      0,    0,    IF_LA,    0x00800000)
-INST(bstrpick_w,    "bstrpick.w",     0,    0,    IF_LA,    0x00608000)
-INST(bstrpick_d,    "bstrpick.d",     0,    0,    IF_LA,    0x00c00000)
+INST(bstrins_w,     "bstrins.w",      0,    0x00600000)
+INST(bstrins_d,     "bstrins.d",      0,    0x00800000)
+INST(bstrpick_w,    "bstrpick.w",     0,    0x00608000)
+INST(bstrpick_d,    "bstrpick.d",     0,    0x00c00000)
 
 ////Load.
-INST(ld_b,          "ld.b",           0,    LD,   IF_LA,    0x28000000)
-INST(ld_h,          "ld.h",           0,    LD,   IF_LA,    0x28400000)
-INST(ld_w,          "ld.w",           0,    LD,   IF_LA,    0x28800000)
-INST(ld_d,          "ld.d",           0,    LD,   IF_LA,    0x28c00000)
-INST(ld_bu,         "ld.bu",          0,    LD,   IF_LA,    0x2a000000)
-INST(ld_hu,         "ld.hu",          0,    LD,   IF_LA,    0x2a400000)
-INST(ld_wu,         "ld.wu",          0,    LD,   IF_LA,    0x2a800000)
-
-INST(ldptr_w,       "ldptr.w",        0,    LD,   IF_LA,    0x24000000)
-INST(ldptr_d,       "ldptr.d",        0,    LD,   IF_LA,    0x26000000)
-INST(ll_w,          "ll.w",           0,    0,    IF_LA,    0x20000000)
-INST(ll_d,          "ll.d",           0,    0,    IF_LA,    0x22000000)
-
-INST(ldx_b,         "ldx.b",          0,    LD,   IF_LA,    0x38000000)
-INST(ldx_h,         "ldx.h",          0,    LD,   IF_LA,    0x38040000)
-INST(ldx_w,         "ldx.w",          0,    LD,   IF_LA,    0x38080000)
-INST(ldx_d,         "ldx.d",          0,    LD,   IF_LA,    0x380c0000)
-INST(ldx_bu,        "ldx.bu",         0,    LD,   IF_LA,    0x38200000)
-INST(ldx_hu,        "ldx.hu",         0,    LD,   IF_LA,    0x38240000)
-INST(ldx_wu,        "ldx.wu",         0,    LD,   IF_LA,    0x38280000)
-
-INST(ldgt_b,        "ldgt.b",         0,    0,    IF_LA,    0x38780000)
-INST(ldgt_h,        "ldgt.h",         0,    0,    IF_LA,    0x38788000)
-INST(ldgt_w,        "ldgt.w",         0,    0,    IF_LA,    0x38790000)
-INST(ldgt_d,        "ldgt.d",         0,    0,    IF_LA,    0x38798000)
-INST(ldle_b,        "ldle.b",         0,    0,    IF_LA,    0x387a0000)
-INST(ldle_h,        "ldle.h",         0,    0,    IF_LA,    0x387a8000)
-INST(ldle_w,        "ldle.w",         0,    0,    IF_LA,    0x387b0000)
-INST(ldle_d,        "ldle.d",         0,    0,    IF_LA,    0x387b8000)
+INST(ld_b,          "ld.b",           LD,   0x28000000)
+INST(ld_h,          "ld.h",           LD,   0x28400000)
+INST(ld_w,          "ld.w",           LD,   0x28800000)
+INST(ld_d,          "ld.d",           LD,   0x28c00000)
+INST(ld_bu,         "ld.bu",          LD,   0x2a000000)
+INST(ld_hu,         "ld.hu",          LD,   0x2a400000)
+INST(ld_wu,         "ld.wu",          LD,   0x2a800000)
+
+INST(ldptr_w,       "ldptr.w",        LD,   0x24000000)
+INST(ldptr_d,       "ldptr.d",        LD,   0x26000000)
+INST(ll_w,          "ll.w",           0,    0x20000000)
+INST(ll_d,          "ll.d",           0,    0x22000000)
+
+INST(ldx_b,         "ldx.b",          LD,   0x38000000)
+INST(ldx_h,         "ldx.h",          LD,   0x38040000)
+INST(ldx_w,         "ldx.w",          LD,   0x38080000)
+INST(ldx_d,         "ldx.d",          LD,   0x380c0000)
+INST(ldx_bu,        "ldx.bu",         LD,   0x38200000)
+INST(ldx_hu,        "ldx.hu",         LD,   0x38240000)
+INST(ldx_wu,        "ldx.wu",         LD,   0x38280000)
+
+INST(ldgt_b,        "ldgt.b",         0,    0x38780000)
+INST(ldgt_h,        "ldgt.h",         0,    0x38788000)
+INST(ldgt_w,        "ldgt.w",         0,    0x38790000)
+INST(ldgt_d,        "ldgt.d",         0,    0x38798000)
+INST(ldle_b,        "ldle.b",         0,    0x387a0000)
+INST(ldle_h,        "ldle.h",         0,    0x387a8000)
+INST(ldle_w,        "ldle.w",         0,    0x387b0000)
+INST(ldle_d,        "ldle.d",         0,    0x387b8000)
 
 ////R_R_I.
-INST(addi_w,        "addi.w",         0,    0,    IF_LA,    0x02800000)
-INST(addi_d,        "addi.d",         0,    0,    IF_LA,    0x02c00000)
-INST(lu52i_d,       "lu52i.d",        0,    0,    IF_LA,    0x03000000)
-INST(slti,          "slti",           0,    0,    IF_LA,    0x02000000)
-
-INST(sltui,         "sltui",          0,    0,    IF_LA,    0x02400000)
-INST(andi,          "andi",           0,    0,    IF_LA,    0x03400000)
-INST(ori,           "ori",            0,    0,    IF_LA,    0x03800000)
-INST(xori,          "xori",           0,    0,    IF_LA,    0x03c00000)
-
-INST(slli_w,        "slli.w",         0,    0,    IF_LA,    0x00408000)
-INST(srli_w,        "srli.w",         0,    0,    IF_LA,    0x00448000)
-INST(srai_w,        "srai.w",         0,    0,    IF_LA,    0x00488000)
-INST(rotri_w,       "rotri.w",        0,    0,    IF_LA,    0x004c8000)
-INST(slli_d,        "slli.d",         0,    0,    IF_LA,    0x00410000)
-INST(srli_d,        "srli.d",         0,    0,    IF_LA,    0x00450000)
-INST(srai_d,        "srai.d",         0,    0,    IF_LA,    0x00490000)
-INST(rotri_d,       "rotri.d",        0,    0,    IF_LA,    0x004d0000)
-
-INST(addu16i_d,     "addu16i.d",      0,    0,    IF_LA,    0x10000000)
-
-INST(jirl,          "jirl",           0,    0,    IF_LA,    0x4c000000)
+INST(addi_w,        "addi.w",         0,    0x02800000)
+INST(addi_d,        "addi.d",         0,    0x02c00000)
+INST(lu52i_d,       "lu52i.d",        0,    0x03000000)
+INST(slti,          "slti",           0,    0x02000000)
+
+INST(sltui,         "sltui",          0,    0x02400000)
+INST(andi,          "andi",           0,    0x03400000)
+INST(ori,           "ori",            0,    0x03800000)
+INST(xori,          "xori",           0,    0x03c00000)
+
+INST(slli_w,        "slli.w",         0,    0x00408000)
+INST(srli_w,        "srli.w",         0,    0x00448000)
+INST(srai_w,        "srai.w",         0,    0x00488000)
+INST(rotri_w,       "rotri.w",        0,    0x004c8000)
+INST(slli_d,        "slli.d",         0,    0x00410000)
+INST(srli_d,        "srli.d",         0,    0x00450000)
+INST(srai_d,        "srai.d",         0,    0x00490000)
+INST(rotri_d,       "rotri.d",        0,    0x004d0000)
+
+INST(addu16i_d,     "addu16i.d",      0,    0x10000000)
+
+INST(jirl,          "jirl",           0,    0x4c000000)
 ////////////////////////////////////////////////////////////////////////////////////////////
 ////NOTE: jirl must be the last one !!! more info to see emitter::emitInsMayWriteToGCReg().
 //
@@ -280,209 +278,209 @@ INST(jirl,          "jirl",           0,    0,    IF_LA,    0x4c000000)
 ////////////////////////////////////////////////////////////////////////////////////////////
 
 ////Store.
-INST(st_b,          "st.b",           0,    ST,   IF_LA,    0x29000000)
-INST(st_h,          "st.h",           0,    ST,   IF_LA,    0x29400000)
-INST(st_w,          "st.w",           0,    ST,   IF_LA,    0x29800000)
-INST(st_d,          "st.d",           0,    ST,   IF_LA,    0x29c00000)
-
-INST(stptr_w,       "stptr.w",        0,    ST,   IF_LA,    0x25000000)
-INST(stptr_d,       "stptr.d",        0,    ST,   IF_LA,    0x27000000)
-INST(sc_w,          "sc.w",           0,    0,    IF_LA,    0x21000000)
-INST(sc_d,          "sc.d",           0,    0,    IF_LA,    0x23000000)
-
-INST(stx_b,         "stx.b",          0,    ST,   IF_LA,    0x38100000)
-INST(stx_h,         "stx.h",          0,    ST,   IF_LA,    0x38140000)
-INST(stx_w,         "stx.w",          0,    ST,   IF_LA,    0x38180000)
-INST(stx_d,         "stx.d",          0,    ST,   IF_LA,    0x381c0000)
-INST(stgt_b,        "stgt.b",         0,    0,    IF_LA,    0x387c0000)
-INST(stgt_h,        "stgt.h",         0,    0,    IF_LA,    0x387c8000)
-INST(stgt_w,        "stgt.w",         0,    0,    IF_LA,    0x387d0000)
-INST(stgt_d,        "stgt.d",         0,    0,    IF_LA,    0x387d8000)
-INST(stle_b,        "stle.b",         0,    0,    IF_LA,    0x387e0000)
-INST(stle_h,        "stle.h",         0,    0,    IF_LA,    0x387e8000)
-INST(stle_w,        "stle.w",         0,    0,    IF_LA,    0x387f0000)
-INST(stle_d,        "stle.d",         0,    0,    IF_LA,    0x387f8000)
-
-INST(dbar,          "dbar",           0,    0,    IF_LA,    0x38720000)
-INST(ibar,          "ibar",           0,    0,    IF_LA,    0x38728000)
-
-INST(syscall,       "syscall",        0,    0,    IF_LA,    0x002b0000)
-INST(break,         "break",          0,    0,    IF_LA,    0x002a0005)
-
-INST(asrtle_d,      "asrtle.d",       0,    0,    IF_LA,    0x00010000)
-INST(asrtgt_d,      "asrtgt.d",       0,    0,    IF_LA,    0x00018000)
-
-INST(preld,         "preld",          0,    LD,   IF_LA,    0x2ac00000)
-INST(preldx,        "preldx",         0,    LD,   IF_LA,    0x382c0000)
+INST(st_b,          "st.b",           ST,   0x29000000)
+INST(st_h,          "st.h",           ST,   0x29400000)
+INST(st_w,          "st.w",           ST,   0x29800000)
+INST(st_d,          "st.d",           ST,   0x29c00000)
+
+INST(stptr_w,       "stptr.w",        ST,   0x25000000)
+INST(stptr_d,       "stptr.d",        ST,   0x27000000)
+INST(sc_w,          "sc.w",           0,    0x21000000)
+INST(sc_d,          "sc.d",           0,    0x23000000)
+
+INST(stx_b,         "stx.b",          ST,   0x38100000)
+INST(stx_h,         "stx.h",          ST,   0x38140000)
+INST(stx_w,         "stx.w",          ST,   0x38180000)
+INST(stx_d,         "stx.d",          ST,   0x381c0000)
+INST(stgt_b,        "stgt.b",         0,    0x387c0000)
+INST(stgt_h,        "stgt.h",         0,    0x387c8000)
+INST(stgt_w,        "stgt.w",         0,    0x387d0000)
+INST(stgt_d,        "stgt.d",         0,    0x387d8000)
+INST(stle_b,        "stle.b",         0,    0x387e0000)
+INST(stle_h,        "stle.h",         0,    0x387e8000)
+INST(stle_w,        "stle.w",         0,    0x387f0000)
+INST(stle_d,        "stle.d",         0,    0x387f8000)
+
+INST(dbar,          "dbar",           0,    0x38720000)
+INST(ibar,          "ibar",           0,    0x38728000)
+
+INST(syscall,       "syscall",        0,    0x002b0000)
+INST(break,         "break",          0,    0x002a0005)
+
+INST(asrtle_d,      "asrtle.d",       0,    0x00010000)
+INST(asrtgt_d,      "asrtgt.d",       0,    0x00018000)
+
+INST(preld,         "preld",          LD,   0x2ac00000)
+INST(preldx,        "preldx",         LD,   0x382c0000)
 
 ////Float instructions.
 ////R_R_R.
-INST(fadd_s,        "fadd.s",         0,    0,    IF_LA,    0x01008000)
-INST(fadd_d,        "fadd.d",         0,    0,    IF_LA,    0x01010000)
-INST(fsub_s,        "fsub.s",         0,    0,    IF_LA,    0x01028000)
-INST(fsub_d,        "fsub.d",         0,    0,    IF_LA,    0x01030000)
-INST(fmul_s,        "fmul.s",         0,    0,    IF_LA,    0x01048000)
-INST(fmul_d,        "fmul.d",         0,    0,    IF_LA,    0x01050000)
-INST(fdiv_s,        "fdiv.s",         0,    0,    IF_LA,    0x01068000)
-INST(fdiv_d,        "fdiv.d",         0,    0,    IF_LA,    0x01070000)
-
-INST(fmax_s,        "fmax.s",         0,    0,    IF_LA,    0x01088000)
-INST(fmax_d,        "fmax.d",         0,    0,    IF_LA,    0x01090000)
-INST(fmin_s,        "fmin.s",         0,    0,    IF_LA,    0x010a8000)
-INST(fmin_d,        "fmin.d",         0,    0,    IF_LA,    0x010b0000)
-INST(fmaxa_s,       "fmaxa.s",        0,    0,    IF_LA,    0x010c8000)
-INST(fmaxa_d,       "fmaxa.d",        0,    0,    IF_LA,    0x010d0000)
-INST(fmina_s,       "fmina.s",        0,    0,    IF_LA,    0x010e8000)
-INST(fmina_d,       "fmina.d",        0,    0,    IF_LA,    0x010f0000)
-
-INST(fscaleb_s,     "fscaleb.s",      0,    0,    IF_LA,    0x01108000)
-INST(fscaleb_d,     "fscaleb.d",      0,    0,    IF_LA,    0x01110000)
-
-INST(fcopysign_s,   "fcopysign.s",    0,    0,    IF_LA,    0x01128000)
-INST(fcopysign_d,   "fcopysign.d",    0,    0,    IF_LA,    0x01130000)
-
-INST(fldx_s,        "fldx.s",         0,    LD,   IF_LA,    0x38300000)
-INST(fldx_d,        "fldx.d",         0,    LD,   IF_LA,    0x38340000)
-INST(fstx_s,        "fstx.s",         0,    ST,   IF_LA,    0x38380000)
-INST(fstx_d,        "fstx.d",         0,    ST,   IF_LA,    0x383c0000)
-
-INST(fldgt_s,       "fldgt.s",        0,    0,    IF_LA,    0x38740000)
-INST(fldgt_d,       "fldgt.d",        0,    0,    IF_LA,    0x38748000)
-INST(fldle_s,       "fldle.s",        0,    0,    IF_LA,    0x38750000)
-INST(fldle_d,       "fldle.d",        0,    0,    IF_LA,    0x38758000)
-INST(fstgt_s,       "fstgt.s",        0,    0,    IF_LA,    0x38760000)
-INST(fstgt_d,       "fstgt.d",        0,    0,    IF_LA,    0x38768000)
-INST(fstle_s,       "fstle.s",        0,    0,    IF_LA,    0x38770000)
-INST(fstle_d,       "fstle.d",        0,    0,    IF_LA,    0x38778000)
+INST(fadd_s,        "fadd.s",         0,    0x01008000)
+INST(fadd_d,        "fadd.d",         0,    0x01010000)
+INST(fsub_s,        "fsub.s",         0,    0x01028000)
+INST(fsub_d,        "fsub.d",         0,    0x01030000)
+INST(fmul_s,        "fmul.s",         0,    0x01048000)
+INST(fmul_d,        "fmul.d",         0,    0x01050000)
+INST(fdiv_s,        "fdiv.s",         0,    0x01068000)
+INST(fdiv_d,        "fdiv.d",         0,    0x01070000)
+
+INST(fmax_s,        "fmax.s",         0,    0x01088000)
+INST(fmax_d,        "fmax.d",         0,    0x01090000)
+INST(fmin_s,        "fmin.s",         0,    0x010a8000)
+INST(fmin_d,        "fmin.d",         0,    0x010b0000)
+INST(fmaxa_s,       "fmaxa.s",        0,    0x010c8000)
+INST(fmaxa_d,       "fmaxa.d",        0,    0x010d0000)
+INST(fmina_s,       "fmina.s",        0,    0x010e8000)
+INST(fmina_d,       "fmina.d",        0,    0x010f0000)
+
+INST(fscaleb_s,     "fscaleb.s",      0,    0x01108000)
+INST(fscaleb_d,     "fscaleb.d",      0,    0x01110000)
+
+INST(fcopysign_s,   "fcopysign.s",    0,    0x01128000)
+INST(fcopysign_d,   "fcopysign.d",    0,    0x01130000)
+
+INST(fldx_s,        "fldx.s",         LD,   0x38300000)
+INST(fldx_d,        "fldx.d",         LD,   0x38340000)
+INST(fstx_s,        "fstx.s",         ST,   0x38380000)
+INST(fstx_d,        "fstx.d",         ST,   0x383c0000)
+
+INST(fldgt_s,       "fldgt.s",        0,    0x38740000)
+INST(fldgt_d,       "fldgt.d",        0,    0x38748000)
+INST(fldle_s,       "fldle.s",        0,    0x38750000)
+INST(fldle_d,       "fldle.d",        0,    0x38758000)
+INST(fstgt_s,       "fstgt.s",        0,    0x38760000)
+INST(fstgt_d,       "fstgt.d",        0,    0x38768000)
+INST(fstle_s,       "fstle.s",        0,    0x38770000)
+INST(fstle_d,       "fstle.d",        0,    0x38778000)
 
 ////R_R_R_R.
-INST(fmadd_s,       "fmadd.s",        0,    0,    IF_LA,    0x08100000)
-INST(fmadd_d,       "fmadd.d",        0,    0,    IF_LA,    0x08200000)
-INST(fmsub_s,       "fmsub.s",        0,    0,    IF_LA,    0x08500000)
-INST(fmsub_d,       "fmsub.d",        0,    0,    IF_LA,    0x08600000)
-INST(fnmadd_s,      "fnmadd.s",       0,    0,    IF_LA,    0x08900000)
-INST(fnmadd_d,      "fnmadd.d",       0,    0,    IF_LA,    0x08a00000)
-INST(fnmsub_s,      "fnmsub.s",       0,    0,    IF_LA,    0x08d00000)
-INST(fnmsub_d,      "fnmsub.d",       0,    0,    IF_LA,    0x08e00000)
+INST(fmadd_s,       "fmadd.s",        0,    0x08100000)
+INST(fmadd_d,       "fmadd.d",        0,    0x08200000)
+INST(fmsub_s,       "fmsub.s",        0,    0x08500000)
+INST(fmsub_d,       "fmsub.d",        0,    0x08600000)
+INST(fnmadd_s,      "fnmadd.s",       0,    0x08900000)
+INST(fnmadd_d,      "fnmadd.d",       0,    0x08a00000)
+INST(fnmsub_s,      "fnmsub.s",       0,    0x08d00000)
+INST(fnmsub_d,      "fnmsub.d",       0,    0x08e00000)
 
 ////R_R.
-INST(fabs_s,        "fabs.s",         0,    0,    IF_LA,    0x01140400)
-INST(fabs_d,        "fabs.d",         0,    0,    IF_LA,    0x01140800)
-INST(fneg_s,        "fneg.s",         0,    0,    IF_LA,    0x01141400)
-INST(fneg_d,        "fneg.d",         0,    0,    IF_LA,    0x01141800)
-
-INST(fsqrt_s,       "fsqrt.s",        0,    0,    IF_LA,    0x01144400)
-INST(fsqrt_d,       "fsqrt.d",        0,    0,    IF_LA,    0x01144800)
-INST(frsqrt_s,      "frsqrt.s",       0,    0,    IF_LA,    0x01146400)
-INST(frsqrt_d,      "frsqrt.d",       0,    0,    IF_LA,    0x01146800)
-INST(frecip_s,      "frecip.s",       0,    0,    IF_LA,    0x01145400)
-INST(frecip_d,      "frecip.d",       0,    0,    IF_LA,    0x01145800)
-INST(flogb_s,       "flogb.s",        0,    0,    IF_LA,    0x01142400)
-INST(flogb_d,       "flogb.d",        0,    0,    IF_LA,    0x01142800)
-INST(fclass_s,      "fclass.s",       0,    0,    IF_LA,    0x01143400)
-INST(fclass_d,      "fclass.d",       0,    0,    IF_LA,    0x01143800)
-
-INST(fcvt_s_d,      "fcvt.s.d",       0,    0,    IF_LA,    0x01191800)
-INST(fcvt_d_s,      "fcvt.d.s",       0,    0,    IF_LA,    0x01192400)
-INST(ffint_s_w,     "ffint.s.w",      0,    0,    IF_LA,    0x011d1000)
-INST(ffint_s_l,     "ffint.s.l",      0,    0,    IF_LA,    0x011d1800)
-INST(ffint_d_w,     "ffint.d.w",      0,    0,    IF_LA,    0x011d2000)
-INST(ffint_d_l,     "ffint.d.l",      0,    0,    IF_LA,    0x011d2800)
-INST(ftint_w_s,     "ftint.w.s",      0,    0,    IF_LA,    0x011b0400)
-INST(ftint_w_d,     "ftint.w.d",      0,    0,    IF_LA,    0x011b0800)
-INST(ftint_l_s,     "ftint.l.s",      0,    0,    IF_LA,    0x011b2400)
-INST(ftint_l_d,     "ftint.l.d",      0,    0,    IF_LA,    0x011b2800)
-INST(ftintrm_w_s,   "ftintrm.w.s",    0,    0,    IF_LA,    0x011a0400)
-INST(ftintrm_w_d,   "ftintrm.w.d",    0,    0,    IF_LA,    0x011a0800)
-INST(ftintrm_l_s,   "ftintrm.l.s",    0,    0,    IF_LA,    0x011a2400)
-INST(ftintrm_l_d,   "ftintrm.l.d",    0,    0,    IF_LA,    0x011a2800)
-INST(ftintrp_w_s,   "ftintrp.w.s",    0,    0,    IF_LA,    0x011a4400)
-INST(ftintrp_w_d,   "ftintrp.w.d",    0,    0,    IF_LA,    0x011a4800)
-INST(ftintrp_l_s,   "ftintrp.l.s",    0,    0,    IF_LA,    0x011a6400)
-INST(ftintrp_l_d,   "ftintrp.l.d",    0,    0,    IF_LA,    0x011a6800)
-INST(ftintrz_w_s,   "ftintrz.w.s",    0,    0,    IF_LA,    0x011a8400)
-INST(ftintrz_w_d,   "ftintrz.w.d",    0,    0,    IF_LA,    0x011a8800)
-INST(ftintrz_l_s,   "ftintrz.l.s",    0,    0,    IF_LA,    0x011aa400)
-INST(ftintrz_l_d,   "ftintrz.l.d",    0,    0,    IF_LA,    0x011aa800)
-INST(ftintrne_w_s,  "ftintrne.w.s",   0,    0,    IF_LA,    0x011ac400)
-INST(ftintrne_w_d,  "ftintrne.w.d",   0,    0,    IF_LA,    0x011ac800)
-INST(ftintrne_l_s,  "ftintrne.l.s",   0,    0,    IF_LA,    0x011ae400)
-INST(ftintrne_l_d,  "ftintrne.l.d",   0,    0,    IF_LA,    0x011ae800)
-INST(frint_s,       "frint.s",        0,    0,    IF_LA,    0x011e4400)
-INST(frint_d,       "frint.d",        0,    0,    IF_LA,    0x011e4800)
-
-INST(fmov_s,        "fmov.s",         0,    0,    IF_LA,    0x01149400)
-INST(fmov_d,        "fmov.d",         0,    0,    IF_LA,    0x01149800)
-
-INST(movgr2fr_w,    "movgr2fr.w",     0,    0,    IF_LA,    0x0114a400)
-INST(movgr2fr_d,    "movgr2fr.d",     0,    0,    IF_LA,    0x0114a800)
-INST(movgr2frh_w,   "movgr2frh.w",    0,    0,    IF_LA,    0x0114ac00)
-INST(movfr2gr_s,    "movfr2gr.s",     0,    0,    IF_LA,    0x0114b400)
-INST(movfr2gr_d,    "movfr2gr.d",     0,    0,    IF_LA,    0x0114b800)
-INST(movfrh2gr_s,   "movfrh2gr.s",    0,    0,    IF_LA,    0x0114bc00)
+INST(fabs_s,        "fabs.s",         0,    0x01140400)
+INST(fabs_d,        "fabs.d",         0,    0x01140800)
+INST(fneg_s,        "fneg.s",         0,    0x01141400)
+INST(fneg_d,        "fneg.d",         0,    0x01141800)
+
+INST(fsqrt_s,       "fsqrt.s",        0,    0x01144400)
+INST(fsqrt_d,       "fsqrt.d",        0,    0x01144800)
+INST(frsqrt_s,      "frsqrt.s",       0,    0x01146400)
+INST(frsqrt_d,      "frsqrt.d",       0,    0x01146800)
+INST(frecip_s,      "frecip.s",       0,    0x01145400)
+INST(frecip_d,      "frecip.d",       0,    0x01145800)
+INST(flogb_s,       "flogb.s",        0,    0x01142400)
+INST(flogb_d,       "flogb.d",        0,    0x01142800)
+INST(fclass_s,      "fclass.s",       0,    0x01143400)
+INST(fclass_d,      "fclass.d",       0,    0x01143800)
+
+INST(fcvt_s_d,      "fcvt.s.d",       0,    0x01191800)
+INST(fcvt_d_s,      "fcvt.d.s",       0,    0x01192400)
+INST(ffint_s_w,     "ffint.s.w",      0,    0x011d1000)
+INST(ffint_s_l,     "ffint.s.l",      0,    0x011d1800)
+INST(ffint_d_w,     "ffint.d.w",      0,    0x011d2000)
+INST(ffint_d_l,     "ffint.d.l",      0,    0x011d2800)
+INST(ftint_w_s,     "ftint.w.s",      0,    0x011b0400)
+INST(ftint_w_d,     "ftint.w.d",      0,    0x011b0800)
+INST(ftint_l_s,     "ftint.l.s",      0,    0x011b2400)
+INST(ftint_l_d,     "ftint.l.d",      0,    0x011b2800)
+INST(ftintrm_w_s,   "ftintrm.w.s",    0,    0x011a0400)
+INST(ftintrm_w_d,   "ftintrm.w.d",    0,    0x011a0800)
+INST(ftintrm_l_s,   "ftintrm.l.s",    0,    0x011a2400)
+INST(ftintrm_l_d,   "ftintrm.l.d",    0,    0x011a2800)
+INST(ftintrp_w_s,   "ftintrp.w.s",    0,    0x011a4400)
+INST(ftintrp_w_d,   "ftintrp.w.d",    0,    0x011a4800)
+INST(ftintrp_l_s,   "ftintrp.l.s",    0,    0x011a6400)
+INST(ftintrp_l_d,   "ftintrp.l.d",    0,    0x011a6800)
+INST(ftintrz_w_s,   "ftintrz.w.s",    0,    0x011a8400)
+INST(ftintrz_w_d,   "ftintrz.w.d",    0,    0x011a8800)
+INST(ftintrz_l_s,   "ftintrz.l.s",    0,    0x011aa400)
+INST(ftintrz_l_d,   "ftintrz.l.d",    0,    0x011aa800)
+INST(ftintrne_w_s,  "ftintrne.w.s",   0,    0x011ac400)
+INST(ftintrne_w_d,  "ftintrne.w.d",   0,    0x011ac800)
+INST(ftintrne_l_s,  "ftintrne.l.s",   0,    0x011ae400)
+INST(ftintrne_l_d,  "ftintrne.l.d",   0,    0x011ae800)
+INST(frint_s,       "frint.s",        0,    0x011e4400)
+INST(frint_d,       "frint.d",        0,    0x011e4800)
+
+INST(fmov_s,        "fmov.s",         0,    0x01149400)
+INST(fmov_d,        "fmov.d",         0,    0x01149800)
+
+INST(movgr2fr_w,    "movgr2fr.w",     0,    0x0114a400)
+INST(movgr2fr_d,    "movgr2fr.d",     0,    0x0114a800)
+INST(movgr2frh_w,   "movgr2frh.w",    0,    0x0114ac00)
+INST(movfr2gr_s,    "movfr2gr.s",     0,    0x0114b400)
+INST(movfr2gr_d,    "movfr2gr.d",     0,    0x0114b800)
+INST(movfrh2gr_s,   "movfrh2gr.s",    0,    0x0114bc00)
 
 ////
-INST(movgr2fcsr,    "movgr2fcsr",     0,    0,    IF_LA,    0x0114c000)
-INST(movfcsr2gr,    "movfcsr2gr",     0,    0,    IF_LA,    0x0114c800)
-INST(movfr2cf,      "movfr2cf",       0,    0,    IF_LA,    0x0114d000)
-INST(movcf2fr,      "movcf2fr",       0,    0,    IF_LA,    0x0114d400)
-INST(movgr2cf,      "movgr2cf",       0,    0,    IF_LA,    0x0114d800)
-INST(movcf2gr,      "movcf2gr",       0,    0,    IF_LA,    0x0114dc00)
+INST(movgr2fcsr,    "movgr2fcsr",     0,    0x0114c000)
+INST(movfcsr2gr,    "movfcsr2gr",     0,    0x0114c800)
+INST(movfr2cf,      "movfr2cf",       0,    0x0114d000)
+INST(movcf2fr,      "movcf2fr",       0,    0x0114d400)
+INST(movgr2cf,      "movgr2cf",       0,    0x0114d800)
+INST(movcf2gr,      "movcf2gr",       0,    0x0114dc00)
 
 ////R_R_I.
-INST(fcmp_caf_s,    "fcmp.caf.s",     0,    0,    IF_LA,    0x0c100000)
-INST(fcmp_cun_s,    "fcmp.cun.s",     0,    0,    IF_LA,    0x0c140000)
-INST(fcmp_ceq_s,    "fcmp.ceq.s",     0,    0,    IF_LA,    0x0c120000)
-INST(fcmp_cueq_s,   "fcmp.cueq.s",    0,    0,    IF_LA,    0x0c160000)
-INST(fcmp_clt_s,    "fcmp.clt.s",     0,    0,    IF_LA,    0x0c110000)
-INST(fcmp_cult_s,   "fcmp.cult.s",    0,    0,    IF_LA,    0x0c150000)
-INST(fcmp_cle_s,    "fcmp.cle.s",     0,    0,    IF_LA,    0x0c130000)
-INST(fcmp_cule_s,   "fcmp.cule.s",    0,    0,    IF_LA,    0x0c170000)
-INST(fcmp_cne_s,    "fcmp.cne.s",     0,    0,    IF_LA,    0x0c180000)
-INST(fcmp_cor_s,    "fcmp.cor.s",     0,    0,    IF_LA,    0x0c1a0000)
-INST(fcmp_cune_s,    "fcmp.cune.s",   0,    0,    IF_LA,    0x0c1c0000)
-
-INST(fcmp_saf_d,    "fcmp.saf.d",     0,    0,    IF_LA,    0x0c208000)
-INST(fcmp_sun_d,    "fcmp.sun.d",     0,    0,    IF_LA,    0x0c248000)
-INST(fcmp_seq_d,    "fcmp.seq.d",     0,    0,    IF_LA,    0x0c228000)
-INST(fcmp_sueq_d,   "fcmp.sueq.d",    0,    0,    IF_LA,    0x0c268000)
-INST(fcmp_slt_d,    "fcmp.slt.d",     0,    0,    IF_LA,    0x0c218000)
-INST(fcmp_sult_d,   "fcmp.sult.d",    0,    0,    IF_LA,    0x0c258000)
-INST(fcmp_sle_d,    "fcmp.sle.d",     0,    0,    IF_LA,    0x0c238000)
-INST(fcmp_sule_d,   "fcmp.sule.d",    0,    0,    IF_LA,    0x0c278000)
-INST(fcmp_sne_d,    "fcmp.sne.d",     0,    0,    IF_LA,    0x0c288000)
-INST(fcmp_sor_d,    "fcmp.sor.d",     0,    0,    IF_LA,    0x0c2a8000)
-INST(fcmp_sune_d,   "fcmp.sune.d",    0,    0,    IF_LA,    0x0c2c8000)
-
-INST(fcmp_caf_d,    "fcmp.caf.d",     0,    0,    IF_LA,    0x0c200000)
-INST(fcmp_cun_d,    "fcmp.cun.d",     0,    0,    IF_LA,    0x0c240000)
-INST(fcmp_ceq_d,    "fcmp.ceq.d",     0,    0,    IF_LA,    0x0c220000)
-INST(fcmp_cueq_d,   "fcmp.cueq.d",    0,    0,    IF_LA,    0x0c260000)
-INST(fcmp_clt_d,    "fcmp.clt.d",     0,    0,    IF_LA,    0x0c210000)
-INST(fcmp_cult_d,   "fcmp.cult.d",    0,    0,    IF_LA,    0x0c250000)
-INST(fcmp_cle_d,    "fcmp.cle.d",     0,    0,    IF_LA,    0x0c230000)
-INST(fcmp_cule_d,   "fcmp.cule.d",    0,    0,    IF_LA,    0x0c270000)
-INST(fcmp_cne_d,    "fcmp.cne.d",     0,    0,    IF_LA,    0x0c280000)
-INST(fcmp_cor_d,    "fcmp.cor.d",     0,    0,    IF_LA,    0x0c2a0000)
-INST(fcmp_cune_d,   "fcmp.cune.d",    0,    0,    IF_LA,    0x0c2c0000)
-
-INST(fcmp_saf_s,    "fcmp.saf.s",     0,    0,    IF_LA,    0x0c108000)
-INST(fcmp_sun_s,    "fcmp.sun.s",     0,    0,    IF_LA,    0x0c148000)
-INST(fcmp_seq_s,    "fcmp.seq.s",     0,    0,    IF_LA,    0x0c128000)
-INST(fcmp_sueq_s,   "fcmp.sueq.s",    0,    0,    IF_LA,    0x0c168000)
-INST(fcmp_slt_s,    "fcmp.slt.s",     0,    0,    IF_LA,    0x0c118000)
-INST(fcmp_sult_s,   "fcmp.sult.s",    0,    0,    IF_LA,    0x0c158000)
-INST(fcmp_sle_s,    "fcmp.sle.s",     0,    0,    IF_LA,    0x0c138000)
-INST(fcmp_sule_s,   "fcmp.sule.s",    0,    0,    IF_LA,    0x0c178000)
-INST(fcmp_sne_s,    "fcmp.sne.s",     0,    0,    IF_LA,    0x0c188000)
-INST(fcmp_sor_s,    "fcmp.sor.s",     0,    0,    IF_LA,    0x0c1a8000)
-INST(fcmp_sune_s,   "fcmp.sune.s",    0,    0,    IF_LA,    0x0c1c8000)
+INST(fcmp_caf_s,    "fcmp.caf.s",     0,    0x0c100000)
+INST(fcmp_cun_s,    "fcmp.cun.s",     0,    0x0c140000)
+INST(fcmp_ceq_s,    "fcmp.ceq.s",     0,    0x0c120000)
+INST(fcmp_cueq_s,   "fcmp.cueq.s",    0,    0x0c160000)
+INST(fcmp_clt_s,    "fcmp.clt.s",     0,    0x0c110000)
+INST(fcmp_cult_s,   "fcmp.cult.s",    0,    0x0c150000)
+INST(fcmp_cle_s,    "fcmp.cle.s",     0,    0x0c130000)
+INST(fcmp_cule_s,   "fcmp.cule.s",    0,    0x0c170000)
+INST(fcmp_cne_s,    "fcmp.cne.s",     0,    0x0c180000)
+INST(fcmp_cor_s,    "fcmp.cor.s",     0,    0x0c1a0000)
+INST(fcmp_cune_s,    "fcmp.cune.s",   0,    0x0c1c0000)
+
+INST(fcmp_saf_d,    "fcmp.saf.d",     0,    0x0c208000)
+INST(fcmp_sun_d,    "fcmp.sun.d",     0,    0x0c248000)
+INST(fcmp_seq_d,    "fcmp.seq.d",     0,    0x0c228000)
+INST(fcmp_sueq_d,   "fcmp.sueq.d",    0,    0x0c268000)
+INST(fcmp_slt_d,    "fcmp.slt.d",     0,    0x0c218000)
+INST(fcmp_sult_d,   "fcmp.sult.d",    0,    0x0c258000)
+INST(fcmp_sle_d,    "fcmp.sle.d",     0,    0x0c238000)
+INST(fcmp_sule_d,   "fcmp.sule.d",    0,    0x0c278000)
+INST(fcmp_sne_d,    "fcmp.sne.d",     0,    0x0c288000)
+INST(fcmp_sor_d,    "fcmp.sor.d",     0,    0x0c2a8000)
+INST(fcmp_sune_d,   "fcmp.sune.d",    0,    0x0c2c8000)
+
+INST(fcmp_caf_d,    "fcmp.caf.d",     0,    0x0c200000)
+INST(fcmp_cun_d,    "fcmp.cun.d",     0,    0x0c240000)
+INST(fcmp_ceq_d,    "fcmp.ceq.d",     0,    0x0c220000)
+INST(fcmp_cueq_d,   "fcmp.cueq.d",    0,    0x0c260000)
+INST(fcmp_clt_d,    "fcmp.clt.d",     0,    0x0c210000)
+INST(fcmp_cult_d,   "fcmp.cult.d",    0,    0x0c250000)
+INST(fcmp_cle_d,    "fcmp.cle.d",     0,    0x0c230000)
+INST(fcmp_cule_d,   "fcmp.cule.d",    0,    0x0c270000)
+INST(fcmp_cne_d,    "fcmp.cne.d",     0,    0x0c280000)
+INST(fcmp_cor_d,    "fcmp.cor.d",     0,    0x0c2a0000)
+INST(fcmp_cune_d,   "fcmp.cune.d",    0,    0x0c2c0000)
+
+INST(fcmp_saf_s,    "fcmp.saf.s",     0,    0x0c108000)
+INST(fcmp_sun_s,    "fcmp.sun.s",     0,    0x0c148000)
+INST(fcmp_seq_s,    "fcmp.seq.s",     0,    0x0c128000)
+INST(fcmp_sueq_s,   "fcmp.sueq.s",    0,    0x0c168000)
+INST(fcmp_slt_s,    "fcmp.slt.s",     0,    0x0c118000)
+INST(fcmp_sult_s,   "fcmp.sult.s",    0,    0x0c158000)
+INST(fcmp_sle_s,    "fcmp.sle.s",     0,    0x0c138000)
+INST(fcmp_sule_s,   "fcmp.sule.s",    0,    0x0c178000)
+INST(fcmp_sne_s,    "fcmp.sne.s",     0,    0x0c188000)
+INST(fcmp_sor_s,    "fcmp.sor.s",     0,    0x0c1a8000)
+INST(fcmp_sune_s,   "fcmp.sune.s",    0,    0x0c1c8000)
 
 ////R_R_I.
-INST(fld_s,         "fld.s",          0,    LD,    IF_LA,   0x2b000000)
-INST(fld_d,         "fld.d",          0,    LD,    IF_LA,   0x2b800000)
-INST(fst_s,         "fst.s",          0,    ST,    IF_LA,   0x2b400000)
-INST(fst_d,         "fst.d",          0,    ST,    IF_LA,   0x2bc00000)
+INST(fld_s,         "fld.s",          LD,   0x2b000000)
+INST(fld_d,         "fld.d",          LD,   0x2b800000)
+INST(fst_s,         "fst.s",          ST,   0x2b400000)
+INST(fst_d,         "fst.d",          ST,   0x2bc00000)
 
 // clang-format on
 /*****************************************************************************/
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index b6a490b89742b..769e5fcbdcaa2 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -548,13 +548,6 @@ CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSave
 #endif // defined(TARGET_ARM64)
 
 #if defined(TARGET_LOONGARCH64)
-// JitSaveFpRaWithCalleeSavedRegisters:
-//    0: use default frame type decision
-//    1: disable frames that save FP/RA registers with the callee-saved registers (at the top of the frame)
-//    2: force all frames to use the frame types that save FP/RA registers with the callee-saved registers (at the top
-//    of the frame)
-CONFIG_INTEGER(JitSaveFpRaWithCalleeSavedRegisters, W("JitSaveFpRaWithCalleeSavedRegisters"), 0)
-
 // Disable emitDispIns by default
 CONFIG_INTEGER(JitDispIns, W("JitDispIns"), 0)
 #endif // defined(TARGET_LOONGARCH64)
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index c4d14d2f2006d..c522b12f21683 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -5359,21 +5359,14 @@ void Compiler::lvaFixVirtualFrameOffsets()
         // We set FP to be after LR, FP
         delta += 2 * REGSIZE_BYTES;
     }
-#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
+#elif defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     else
     {
         // FP is used.
         JITDUMP("--- delta bump %d for FP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta());
         delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
     }
-#elif defined(TARGET_LOONGARCH64)
-    else
-    {
-        // FP is used.
-        JITDUMP("--- delta bump %d for RBP frame\n", codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta());
-        delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
-    }
-#endif // TARGET_LOONGARCH64
+#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64
 
     if (opts.IsOSR())
     {
@@ -5487,23 +5480,11 @@ void Compiler::lvaFixVirtualFrameOffsets()
 
 #endif // FEATURE_FIXED_OUT_ARGS
 
-#ifdef TARGET_ARM64
-    // We normally add alignment below the locals between them and the outgoing
-    // arg space area. When we store fp/lr at the bottom, however, this will be
-    // below the alignment. So we should not apply the alignment adjustment to
-    // them. On ARM64 it turns out we always store these at +0 and +8 of the FP,
-    // so instead of dealing with skipping adjustment just for them we just set
-    // them here always.
-    assert(codeGen->isFramePointerUsed());
-    if (lvaRetAddrVar != BAD_VAR_NUM)
-    {
-        lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES);
-    }
-#elif defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     // We normally add alignment below the locals between them and the outgoing
-    // arg space area. When we store fp/ra at the bottom, however, this will be
-    // below the alignment. So we should not apply the alignment adjustment to
-    // them. On LOONGARCH64 it turns out we always store these at +0 and +8 of the FP,
+    // arg space area. When we store fp/lr(ra) at the bottom, however, this will
+    // be below the alignment. So we should not apply the alignment adjustment to
+    // them. It turns out we always store these at +0 and +8 of the FP,
     // so instead of dealing with skipping adjustment just for them we just set
     // them here always.
     assert(codeGen->isFramePointerUsed());
@@ -5511,7 +5492,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
     {
         lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES);
     }
-#endif
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
 }
 
 #ifdef TARGET_ARM
@@ -6018,16 +5999,8 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
         }
 
 #elif defined(TARGET_LOONGARCH64)
-// if (compFeatureArgSplit() && this->info.compIsVarArgs)
-//{//TODO: should confirm for "info.compIsVarArgs".
-//    if (varDsc->lvType == TYP_STRUCT && varDsc->lvOtherArgReg >= MAX_REG_ARG && varDsc->lvOtherArgReg != REG_NA)
-//    {
-//        // This is a split struct. It will account for an extra (8 bytes)
-//        // of alignment.
-//        varDsc->lvStkOffs += TARGET_POINTER_SIZE;
-//        argOffs += TARGET_POINTER_SIZE;
-//    }
-//}
+// empty for LoongArch64.
+
 #else // TARGET*
 #error Unsupported or unset target architecture
 #endif // TARGET*
@@ -6267,30 +6240,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif
 
-#ifdef TARGET_LOONGARCH64
-    // Decide where to save FP and RA registers. We store FP/RA registers at the bottom of the frame if there is
-    // a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we
-    // need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value,
-    // and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the
-    // frame types. Since saving FP/RA at high addresses is a relatively rare case, force using it during stress.
-    // (It should be legal to use these frame types for every frame).
-
-    if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 0)
-    {
-        // Default configuration
-        codeGen->SetSaveFpRaWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) ||
-                                                        compStressCompile(STRESS_GENERIC_VARN, 20));
-    }
-    else if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 1)
-    {
-        codeGen->SetSaveFpRaWithAllCalleeSavedRegisters(false); // Disable using new frames
-    }
-    else if (opts.compJitSaveFpRaWithCalleeSavedRegisters == 2)
-    {
-        codeGen->SetSaveFpRaWithAllCalleeSavedRegisters(true); // Force using new frames
-    }
-#endif // TARGET_LOONGARCH64
-
     int  preSpillSize    = 0;
     bool mustDoubleAlign = false;
 
@@ -6342,17 +6291,10 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         initialStkOffs = MAX_REG_ARG * REGSIZE_BYTES;
         stkOffs -= initialStkOffs;
     }
-    if (codeGen->IsSaveFpRaWithAllCalleeSavedRegisters() ||
-        !isFramePointerUsed()) // Note that currently we always have a frame pointer
-    {
-        stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
-    }
-    else
-    {
-        // Subtract off FP and RA.
-        assert(compCalleeRegsPushed >= 2);
-        stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
-    }
+
+    // Subtract off FP and RA.
+    assert(compCalleeRegsPushed >= 2);
+    stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
 
 #else // !TARGET_LOONGARCH64
 #ifdef TARGET_ARM
@@ -6451,7 +6393,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif // TARGET_AMD64
 
-#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARMARCH)
+#if defined(FEATURE_EH_FUNCLETS) && (defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64))
     if (lvaPSPSym != BAD_VAR_NUM)
     {
         // On ARM/ARM64, if we need a PSPSym, allocate it first, before anything else, including
@@ -6460,18 +6402,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
         noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
         stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
     }
-#endif // FEATURE_EH_FUNCLETS && defined(TARGET_ARMARCH)
-
-#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_LOONGARCH64)
-    if (lvaPSPSym != BAD_VAR_NUM)
-    {
-        // If we need a PSPSym, allocate it first, before anything else, including
-        // padding (so we can avoid computing the same padding in the funclet
-        // frame). Note that there is no special padding requirement for the PSPSym.
-        noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
-        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
-    }
-#endif // FEATURE_EH_FUNCLETS || TARGET_LOONGARCH64
+#endif // FEATURE_EH_FUNCLETS && (TARGET_ARMARCH || TARGET_LOONGARCH64)
 
     if (mustDoubleAlign)
     {
@@ -6866,7 +6797,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                     continue;
                 }
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
                 if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum())
                 {
                     // Stack offset to varargs (parameters) should point to home area which will be preallocated.
@@ -6888,16 +6819,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                 }
 #endif
 
-#ifdef TARGET_LOONGARCH64
-                if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum())
-                { // TODO: add VarArgs for LOONGARCH64.
-                    // Stack offset to parameters should point to home area which will be preallocated.
-                    varDsc->SetStackOffset(-initialStkOffs +
-                                           genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES);
-                    continue;
-                }
-#endif
-
 #endif // !TARGET_AMD64
             }
 
@@ -6980,7 +6901,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 
             // Reserve the stack space for this variable
             stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs);
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
             // If we have an incoming register argument that has a struct promoted field
             // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar
             //
@@ -7006,20 +6927,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                 lvaTable[fieldVarNum + 1].SetStackOffset(varDsc->GetStackOffset() + 4);
             }
 #endif // TARGET_ARM
-#endif // TARGET_ARM64
-
-#ifdef TARGET_LOONGARCH64
-            // If we have an incoming register argument that has a struct promoted field
-            // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar
-            //
-            if (varDsc->lvIsRegArg && varDsc->lvPromotedStruct())
-            {
-                noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
-
-                unsigned fieldVarNum = varDsc->lvFieldLclStart;
-                lvaTable[fieldVarNum].SetStackOffset(varDsc->GetStackOffset());
-            }
-#endif // TARGET_LOONGARCH64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
         }
     }
 
@@ -7124,13 +7032,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     }
 #endif // TARGET_ARM64
 
-#ifdef TARGET_LOONGARCH64
-    if (!codeGen->IsSaveFpRaWithAllCalleeSavedRegisters() &&
-        isFramePointerUsed()) // Note that currently we always have a frame pointer
-    {
-        // Create space for saving FP and RA.
-        stkOffs -= 2 * REGSIZE_BYTES;
-    }
+#if defined(TARGET_LOONGARCH64)
+    assert(isFramePointerUsed()); // Note that currently we always have a frame pointer
+    stkOffs -= 2 * REGSIZE_BYTES;
 #endif // TARGET_LOONGARCH64
 
 #if FEATURE_FIXED_OUT_ARGS
@@ -7153,7 +7057,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'.
     int pushedCount = compCalleeRegsPushed;
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     if (info.compIsVarArgs)
     {
         pushedCount += MAX_REG_ARG;
@@ -7168,13 +7072,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     pushedCount += 1; // pushed PC (return address)
 #endif
 
-#ifdef TARGET_LOONGARCH64
-    if (info.compIsVarArgs)
-    {
-        pushedCount += MAX_REG_ARG;
-    }
-#endif
-
     noway_assert(compLclFrameSize + originalFrameSize ==
                  (unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE)));
 }
@@ -7343,9 +7240,9 @@ void Compiler::lvaAlignFrame()
         lvaIncrementFrameSize(REGSIZE_BYTES);
     }
 
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
-    // The stack on ARM64 must be 16 byte aligned.
+    // The stack on ARM64/LoongArch64 must be 16 byte aligned.
 
     // First, align up to 8.
     if ((compLclFrameSize % 8) != 0)
@@ -7428,34 +7325,6 @@ void Compiler::lvaAlignFrame()
         }
     }
 
-#elif defined(TARGET_LOONGARCH64)
-
-    // First, align up to 8.
-    if ((compLclFrameSize % 8) != 0)
-    {
-        lvaIncrementFrameSize(8 - (compLclFrameSize % 8));
-    }
-    else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
-    {
-        // If we are not doing final layout, we don't know the exact value of compLclFrameSize
-        // and thus do not know how much we will need to add in order to be aligned.
-        // We add 8 so compLclFrameSize is still a multiple of 8.
-        lvaIncrementFrameSize(8);
-    }
-    assert((compLclFrameSize % 8) == 0);
-
-    // Ensure that the stack is always 16-byte aligned by grabbing an unused 16-byte
-    // if needed.
-    bool regPushedCountAligned = (compCalleeRegsPushed % (16 / REGSIZE_BYTES)) != 0;
-    bool lclFrameSizeAligned   = (compLclFrameSize % 16) != 0;
-
-    // If this isn't the final frame layout, assume we have to push an extra QWORD
-    // Just so the offsets are true upper limits.
-    if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || (regPushedCountAligned != lclFrameSizeAligned))
-    {
-        lvaIncrementFrameSize(REGSIZE_BYTES);
-    }
-
 #else
     NYI("TARGET specific lvaAlignFrame");
 #endif // !TARGET_AMD64
@@ -8030,11 +7899,11 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
 
     compCalleeRegsPushed = CNT_CALLEE_SAVED;
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     if (compFloatingPointUsed)
         compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT;
 
-    compCalleeRegsPushed++; // we always push LR.  See genPushCalleeSavedRegisters
+    compCalleeRegsPushed++; // we always push LR/RA.  See genPushCalleeSavedRegisters
 #elif defined(TARGET_AMD64)
     if (compFloatingPointUsed)
     {
@@ -8044,11 +7913,6 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
     {
         compCalleeFPRegsSavedMask = RBM_NONE;
     }
-#elif defined(TARGET_LOONGARCH64)
-    if (compFloatingPointUsed)
-        compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT;
-
-    compCalleeRegsPushed++; // we always push RA.  See genPushCalleeSavedRegisters
 #endif
 
 #if DOUBLE_ALIGN
@@ -8071,20 +7935,12 @@ unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
     lvaAssignFrameOffsets(curState);
 
     unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ;
-#if defined(TARGET_ARMARCH)
-    if (compFloatingPointUsed)
-    {
-        calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
-    }
-    calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR.  See genPushCalleeSavedRegisters
-#endif
-
-#if defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
     if (compFloatingPointUsed)
     {
         calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
     }
-    calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push RA.  See genPushCalleeSavedRegisters
+    calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR/RA.  See genPushCalleeSavedRegisters
 #endif
 
     result = compLclFrameSize + calleeSavedRegMaxSz;
@@ -8396,20 +8252,13 @@ Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData*
         // Calculate padding
         unsigned padding = LCL_FLD_PADDING(lclNum);
 
-#ifdef TARGET_ARMARCH
-        // We need to support alignment requirements to access memory on ARM ARCH
-        unsigned alignment = 1;
-        pComp->codeGen->InferOpSizeAlign(lcl, &alignment);
-        alignment = roundUp(alignment, TARGET_POINTER_SIZE);
-        padding   = roundUp(padding, alignment);
-#endif // TARGET_ARMARCH
-
-#ifdef TARGET_LOONGARCH64
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
+        // We need to support alignment requirements to access memory.
         unsigned alignment = 1;
         pComp->codeGen->InferOpSizeAlign(lcl, &alignment);
         alignment = roundUp(alignment, TARGET_POINTER_SIZE);
         padding   = roundUp(padding, alignment);
-#endif // TARGET_LOONGARCH64
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
         // Change the variable to a TYP_BLK
         if (varType != TYP_BLK)
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 859a9d3100676..8ea2160a5454a 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -1461,7 +1461,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
 #endif // !defined(TARGET_64BIT)
     {
 
-#ifdef TARGET_ARMARCH
+#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
         if (call->IsVarargs() || comp->opts.compUseSoftFP)
         {
             // For vararg call or on armel, reg args should be all integer.
@@ -1472,32 +1472,8 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
                 type = newNode->TypeGet();
             }
         }
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
-#if defined(TARGET_LOONGARCH64)
-        if (call->IsVarargs())
-        {
-            // For vararg call, reg args should be all integer.
-            // Insert copies as needed to move float value to integer register.
-            GenTree* newNode = LowerFloatArg(ppArg, info);
-            if (newNode != nullptr)
-            {
-                type = newNode->TypeGet();
-            }
-        }
-        else
-        {
-            GenTree* putArg = NewPutArg(call, arg, info, type);
-
-            // In the case of register passable struct (in one or two registers)
-            // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.)
-            // If an extra node is returned, splice it in the right place in the tree.
-            if (arg != putArg)
-            {
-                ReplaceArgWithPutArgOrBitcast(ppArg, putArg);
-            }
-        }
-#else
         GenTree* putArg = NewPutArg(call, arg, info, type);
 
         // In the case of register passable struct (in one or two registers)
@@ -1507,7 +1483,6 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
         {
             ReplaceArgWithPutArgOrBitcast(ppArg, putArg);
         }
-#endif
     }
 }
 
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 13d6cb3469dd9..9bd8beca15157 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -66,15 +66,13 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
 
         switch (parentNode->OperGet())
         {
-            case GT_ADD:
-                return ((-2048 <= immVal) && (immVal <= 2047));
-                break;
             case GT_CMPXCHG:
             case GT_LOCKADD:
             case GT_XADD:
-                NYI_LOONGARCH64("unimplemented on LOONGARCH yet");
+                NYI_LOONGARCH64("GT_CMPXCHG,GT_LOCKADD,GT_XADD");
                 break;
 
+            case GT_ADD:
             case GT_EQ:
             case GT_NE:
             case GT_LT:
@@ -82,11 +80,11 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
             case GT_GE:
             case GT_GT:
             case GT_BOUNDS_CHECK:
-                return ((-32768 <= immVal) && (immVal <= 32767));
+                return emitter::isValidSimm12(immVal);
             case GT_AND:
             case GT_OR:
             case GT_XOR:
-                return ((-2048 <= immVal) && (immVal <= 2047));
+                return emitter::isValidUimm11(immVal);
             case GT_JCMP:
                 assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
                 return true;
@@ -411,11 +409,8 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT
     GenTreeIntCon* offsetNode = addr->AsOp()->gtGetOp2()->AsIntCon();
     ssize_t        offset     = offsetNode->IconValue();
 
-    // All integer load/store instructions on both ARM32 and ARM64 support
-    // offsets in range -255..255. Of course, this is a rather conservative
-    // check. For example, if the offset and size are a multiple of 8 we
-    // could allow a combined offset of up to 32760 on ARM64.
-    if ((offset < -255) || (offset > 255) || (offset + static_cast<int>(size) > 256))
+    // TODO-LoongArch64: not including the ldptr and SIMD offset which not used right now.
+    if (!emitter::isValidSimm12(offset) || !emitter::isValidSimm12(offset + static_cast<int>(size)))
     {
         return;
     }
@@ -514,7 +509,7 @@ void Lowering::LowerRotate(GenTree* tree)
 //
 void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
 {
-    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
+    NYI_LOONGARCH64("LowerSIMD");
 }
 #endif // FEATURE_SIMD
 
@@ -527,7 +522,7 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
 //
 void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 {
-    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
+    NYI_LOONGARCH64("LowerHWIntrinsic");
 }
 
 //----------------------------------------------------------------------------------------------
@@ -543,7 +538,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
 //     This check may end up modifying node->gtOp1 if it is a cast node that can be removed
 bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node)
 {
-    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
+    NYI_LOONGARCH64("IsValidConstForMovImm");
     return false;
 }
 
@@ -556,7 +551,7 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node)
 //
 void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
 {
-    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
+    NYI_LOONGARCH64("LowerHWIntrinsicCmpOp");
 }
 
 //----------------------------------------------------------------------------------------------
@@ -567,7 +562,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
 //
 void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
 {
-    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
+    NYI_LOONGARCH64("LowerHWIntrinsicCreate");
 }
 
 //----------------------------------------------------------------------------------------------
@@ -578,7 +573,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
 //
 void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
 {
-    NYI_LOONGARCH64("unimplemented on LoongArch64 yet");
+    NYI_LOONGARCH64("LowerHWIntrinsicDot");
 }
 
 #endif // FEATURE_HW_INTRINSICS
@@ -598,7 +593,7 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
 //
 void Lowering::ContainCheckCallOperands(GenTreeCall* call)
 {
-    // There are no contained operands for LOONGARCH.
+    // There are no contained operands for LoongArch64.
 }
 
 //------------------------------------------------------------------------
@@ -640,18 +635,7 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode)
     }
 
 #ifdef FEATURE_SIMD
-    assert(!"unimplemented on LOONGARCH yet");
-    // If indirTree is of TYP_SIMD12, don't mark addr as contained
-    // so that it always get computed to a register.  This would
-    // mean codegen side logic doesn't need to handle all possible
-    // addr expressions that could be contained.
-    //
-    // TODO-LOONGARCH64-CQ: handle other addr mode expressions that could be marked
-    // as contained.
-    if (indirNode->TypeGet() == TYP_SIMD12)
-    {
-        return;
-    }
+    NYI_LOONGARCH64("ContainCheckIndir-SIMD");
 #endif // FEATURE_SIMD
 
     GenTree* addr = indirNode->Addr();
@@ -790,6 +774,7 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const
 //
 void Lowering::ContainCheckCast(GenTreeCast* node)
 {
+    // There are no contained operands for LoongArch64.
 }
 
 //------------------------------------------------------------------------
@@ -827,7 +812,7 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node)
 //
 void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
 {
-    NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+    NYI_LOONGARCH64("ContainCheckSIMD");
 }
 #endif // FEATURE_SIMD
 
@@ -840,7 +825,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
 //
 void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
 {
-    NYI_LOONGARCH64("-----unimplemented on LOONGARCH64 yet----");
+    NYI_LOONGARCH64("ContainCheckHWIntrinsic");
 }
 #endif // FEATURE_HW_INTRINSICS
 
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index b95293dcacf7c..39096cec86af8 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -1869,23 +1869,10 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
             addrNode = arg;
 
 #if FEATURE_MULTIREG_ARGS
-#ifdef TARGET_ARM64
-            assert(varTypeIsStruct(type));
-            if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg()))
-            {
-                // We will create a GT_OBJ for the argument below.
-                // This will be passed by value in two registers.
-                assert(addrNode != nullptr);
-
-                // Create an Obj of the temp to use it as a call argument.
-                arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
-            }
-#elif defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
             assert(varTypeIsStruct(type));
             if (lvaIsMultiregStruct(varDsc, curArgTabEntry->IsVararg()))
             {
-                // ToDo-LOONGARCH64: Consider using:  arg->ChangeOper(GT_LCL_FLD);
-                // as that is how UNIX_AMD64_ABI works.
                 // We will create a GT_OBJ for the argument below.
                 // This will be passed by value in two registers.
                 assert(addrNode != nullptr);
@@ -1896,7 +1883,7 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
 #else
             // Always create an Obj of the temp to use it as a call argument.
             arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
-#endif // !TARGET_ARM64
+#endif // !(TARGET_ARM64 || TARGET_LOONGARCH64)
 #endif // FEATURE_MULTIREG_ARGS
         }
 
@@ -2939,7 +2926,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 #endif
         }
 
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
         assert(!callIsVararg || !isHfaArg);
         passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argx));
@@ -2950,17 +2937,15 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
 #elif defined(TARGET_X86)
 
-        passUsingFloatRegs    = false;
-
-#elif defined(TARGET_LOONGARCH64)
-        assert(!callIsVararg);
-        assert(!isHfaArg);
-        passUsingFloatRegs = !callIsVararg && varTypeIsFloating(argx);
+        passUsingFloatRegs = false;
 
 #else
 #error Unsupported or unset target architecture
 #endif // TARGET*
 
+#if defined(TARGET_LOONGARCH64)
+        DWORD floatFieldFlags = 0;
+#endif
         bool      isBackFilled     = false;
         unsigned  nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
         var_types structBaseType   = TYP_STRUCT;
@@ -3027,7 +3012,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
         }
 
 #endif // UNIX_AMD64_ABI
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         if (isStructArg)
         {
             if (isHfaArg)
@@ -3073,27 +3058,6 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
             size     = genTypeStSz(argx->gtType);
             byteSize = genTypeSize(argx);
         }
-#elif defined(TARGET_LOONGARCH64)
-        DWORD floatFieldFlags = 0;
-        if (!isStructArg)
-        {
-            size     = 1;
-            byteSize = genTypeSize(argx);
-        }
-        else
-        {
-            // Structs are either passed in 1 or 2 (64-bit) slots.
-            // Structs that are the size of 2 pointers are passed by value in multiple registers,
-            // if sufficient registers are available.
-            // Structs that are larger than 2 pointers are passed by reference (to a copy).
-            size = (unsigned)(roundUp(structSize, TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE;
-
-            if (size > 2)
-            {
-                size = 1;
-            }
-            byteSize = structSize;
-        }
 #else
 #error Unsupported or unset target architecture
 #endif // TARGET_XXX
@@ -3117,9 +3081,13 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                 compFloatingPointUsed |= passUsingFloatRegs;
 
                 if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO))
+                {
                     size = 1;
+                }
                 else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
+                {
                     size = 2;
+                }
             }
             else // if (passStructByRef)
             {
@@ -3284,7 +3252,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                 {
                     if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) &&
                         passUsingFloatRegs)
+                    {
                         passUsingFloatRegs = isRegArg = intArgRegNum < maxRegArgs;
+                    }
 
                     if (!passUsingFloatRegs)
                     {
@@ -3294,7 +3264,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                     else if (passUsingFloatRegs)
                     {
                         if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
+                        {
                             nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1);
+                        }
                         else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
                         {
                             assert(size == 1);
@@ -3320,7 +3292,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                 // Check if the last register needed is still in the int argument register range.
                 isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
                 if (!passUsingFloatRegs && isRegArg && (size > 1))
+                {
                     nextOtherRegNum = genMapIntRegArgNumToRegNum(intArgRegNum + 1);
+                }
 
                 // Did we run out of registers when we had a 16-byte struct (size===2) ?
                 // (i.e we only have one register remaining but we needed two registers to pass this arg)
@@ -3502,23 +3476,20 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                 {
                     if ((size > 1) && ((intArgRegNum + 1) == maxRegArgs) && (nextOtherRegNum == REG_STK))
                     {
-#if FEATURE_ARG_SPLIT
-                        // This indicates a partial enregistration of a struct type
-                        assert((isStructArg) || argx->OperIs(GT_FIELD_LIST) || argx->OperIsCopyBlkOp() ||
-                               (argx->gtOper == GT_COMMA && (argx->gtFlags & GTF_ASG)));
-                        call->fgArgInfo->SplitArg(argIndex, 1, 1);
-#endif // FEATURE_ARG_SPLIT
                         assert(!passUsingFloatRegs);
                         assert(size == 2);
-                        // assert(nextOtherRegNum == REG_STK);
                         intArgRegNum = maxRegArgs;
                     }
                     else if ((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) == 0x0)
                     {
                         if (passUsingFloatRegs)
+                        {
                             fltArgRegNum += 1;
+                        }
                         else
+                        {
                             intArgRegNum += size;
+                        }
                     }
                     else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
                     {
@@ -4927,8 +4898,9 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                 }
                 else
                 {
-                    assert(!"----------------unimplemented type-case... on LOONGARCH");
-                    unreached();
+                    NYI_LOONGARCH64("fgMorphMultiregStructArg -- GT_LCL_FLD,GT_LCL_VAR");
+                    tmp_type_1 = TYP_UNDEF;
+                    tmp_type_2 = TYP_UNDEF;
                 }
                 elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4;
 
@@ -5014,8 +4986,9 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                 }
                 else
                 {
-                    assert(!"----------------unimplemented type-case... on LOONGARCH");
-                    unreached();
+                    NYI_LOONGARCH64("fgMorphMultiregStructArg -- GT_OBJ struct");
+                    tmp_type_1 = TYP_UNDEF;
+                    tmp_type_2 = TYP_UNDEF;
                 }
                 elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4;
 
diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp
index 755dd28915684..1b5d1839b5e4c 100644
--- a/src/coreclr/jit/register_arg_convention.cpp
+++ b/src/coreclr/jit/register_arg_convention.cpp
@@ -48,7 +48,7 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
         {
             assert(varTypeIsStruct(type));
             nextReg(TYP_INT, 1); // TYP_BYREF
-        }                        // TODO:struct-float.
+        }
         else
         {
             nextReg(type, numRegs);
diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h
index 536ef627d6062..e5bf31e7f66c2 100644
--- a/src/coreclr/jit/target.h
+++ b/src/coreclr/jit/target.h
@@ -92,7 +92,7 @@ inline bool compUnixX86Abi()
 //                       be assigned during register allocation.
 //    REG_NA           - Used to indicate that a register is either not yet assigned or not required.
 //
-#if defined(TARGET_ARM)
+#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64)
 enum _regNumber_enum : unsigned
 {
 #define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
@@ -177,27 +177,6 @@ enum _regMask_enum : unsigned
 #include "register.h"
 };
 
-#elif defined(TARGET_LOONGARCH64)
-
-enum _regNumber_enum : unsigned
-{
-#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
-#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
-#include "register.h"
-
-    REG_COUNT,
-    REG_NA           = REG_COUNT,
-    ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
-};
-
-enum _regMask_enum : unsigned __int64
-{
-    RBM_NONE = 0,
-#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
-#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
-#include "register.h"
-};
-
 #else
 #error Unsupported target architecture
 #endif
diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h
index daf251b33477d..25355994d385b 100644
--- a/src/coreclr/jit/targetloongarch64.h
+++ b/src/coreclr/jit/targetloongarch64.h
@@ -8,7 +8,7 @@
 
 // NOTE for LoongArch64:
 //  The `REG_R21` which alias `REG_X0` is specially reserved !!!
-//  It can be used only by manully and should be very careful!!!
+//  It can be used only manully and very carefully!!!
 
 // clang-format off
   #define CPU_LOAD_STORE_ARCH      1
diff --git a/src/coreclr/jit/unwind.h b/src/coreclr/jit/unwind.h
index bb93348cc2fdd..ae9a19a4b37f3 100644
--- a/src/coreclr/jit/unwind.h
+++ b/src/coreclr/jit/unwind.h
@@ -10,7 +10,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 */
 
-////TODO for LOONGARCH64: should seperately define for loongarch64.
 #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
 
 // Windows no longer imposes a maximum prolog size. However, we still have an
@@ -138,11 +137,9 @@ class UnwindCodesBase
     {
 #if defined(TARGET_ARM)
         return b >= 0xFD;
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         return (b == UWC_END); // TODO-ARM64-Bug?: what about the "end_c" code?
-#elif defined(TARGET_LOONGARCH64)
-        return (b == UWC_END);
-#endif // TARGET_ARM64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
     }
 
 #ifdef DEBUG
@@ -875,4 +872,4 @@ void DumpUnwindInfo(Compiler*         comp,
 
 #endif // DEBUG
 
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64

From 66e495356b20e39c78908f9ba45365aa2994a45c Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Sat, 19 Mar 2022 21:46:47 +0800
Subject: [PATCH 37/46] [LoongArch64] amend the output format of
 `emitDisInsName`.

---
 src/coreclr/jit/emitloongarch64.cpp | 849 ++++++++++++++--------------
 src/coreclr/jit/emitloongarch64.h   |   3 +-
 2 files changed, 431 insertions(+), 421 deletions(-)

diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index 098227a7ce230..a3f2a226f8923 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -2679,15 +2679,15 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
     return callInstrSize;
 }
 
-/*****************************************************************************
- *  LoongArch64 has an individual implementation for emitJumpDistBind().
- *
- *  Bind targets of relative jumps/branch to choose the smallest possible encoding.
- *  LoongArch64 has a small medium, and large encoding.
- *
- *  Even though the small encoding is offset-18bits which lowest 2bits is always 0.
- *  The small encoding as the default is fit for most cases.
- */
+//----------------------------------------------------------------------------------
+//  LoongArch64 has an individual implementation for emitJumpDistBind().
+//
+//  Bind targets of relative jumps/branch to choose the smallest possible encoding.
+//  LoongArch64 has a small medium, and large encoding.
+//
+//  Even though the small encoding is offset-18bits which lowest 2bits is always 0.
+//  The small encoding as the default is fit for most cases.
+//
 
 void emitter::emitJumpDistBind()
 {
@@ -2836,15 +2836,6 @@ void emitter::emitJumpDistBind()
             /* First time we've seen this label, convert its target */
             CLANG_FORMAT_COMMENT_ANCHOR;
 
-#ifdef DEBUG
-            if (EMITVERBOSE)
-            {
-                printf("Binding: ");
-                emitDispIns(jmp, false, false, false);
-                printf("Binding L_M%03u_" FMT_BB, emitComp->compMethodID, jmp->idAddr()->iiaBBlabel->bbNum);
-            }
-#endif // DEBUG
-
             tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel);
 
 #ifdef DEBUG
@@ -3856,7 +3847,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             emitDisInsName(*cp, (BYTE*)cp, id);
             cp++;
         }
-        // emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
     }
 
     if (emitComp->compDebugBreak)
@@ -3892,26 +3882,41 @@ static const char* const RegNames[] =
 };
 // clang-format on
 
-/****************************************************************************
- *
- *  Display the given instruction.
- */
+//----------------------------------------------------------------------------------------
+// Disassemble the given instruction.
+// The `emitter::emitDisInsName` is focused on the most important for debugging.
+// So it implemented as far as simply and independently which is very useful for
+// porting easily to the release mode.
+//
+// Arguments:
+//    code - The instruction's encoding.
+//    addr - The address of the code.
+//    id   - The instrDesc of the code if needed.
+//
+// Note:
+//    The length of the instruction's name include aligned space is 13.
+//
 
-// NOTE: At least 32bytes within dst.
-void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
+void emitter::emitDisInsName(code_t code, const BYTE* addr, instrDesc* id)
 {
-    const BYTE* insstrs = dst;
+    const BYTE*       insAdr      = addr;
+    const char* const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"};
 
-    if (!code)
+    unsigned int opcode = (code >> 26) & 0x3f;
+
+    bool disOpcode = !emitComp->opts.disDiffable;
+    bool disAddr   = emitComp->opts.disAddr;
+    if (disAddr)
     {
-        printf("LOONGARCH invalid instruction: 0x%x\n", code);
-        assert(!"invalid inscode on LOONGARCH!");
-        return;
+        printf("  0x%llx", insAdr);
     }
 
-    const char* const CFregName[] = {"fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7"};
+    printf("  ");
 
-    unsigned int opcode = (code >> 26) & 0x3f;
+    if (disOpcode)
+    {
+        printf("%08X  ", code);
+    }
 
     // bits: 31-26,MSB6
     switch (opcode)
@@ -3919,29 +3924,25 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         case 0x0:
         {
             goto Label_OPCODE_0;
-            // break;
         }
         case 0x2:
         {
             goto Label_OPCODE_2;
-            // break;
         }
         case 0x3:
         {
             goto Label_OPCODE_3;
-            // break;
         }
         case 0xe:
         {
             goto Label_OPCODE_E;
-            // break;
         }
         case LA_2RI16_ADDU16I_D: // 0x4
         {
             const char* rd   = RegNames[code & 0x1f];
             const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si16 = (code >> 10) & 0xffff;
-            printf("   0x%llx   addu16i.d  %s, %s, %d\n", insstrs, rd, rj, si16);
+            printf("addu16i.d    %s, %s, %d\n", rd, rj, si16);
             return;
         }
         case 0x5:
@@ -3955,27 +3956,27 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             switch (inscode)
             {
                 case LA_1RI20_LU12I_W:
-                    printf("   0x%llx   lu12i.w  %s, 0x%x\n", insstrs, rd, si20);
+                    printf("lu12i.w      %s, 0x%x\n", rd, si20);
                     return;
                 case LA_1RI20_LU32I_D:
-                    printf("   0x%llx   lu32i.d  %s, 0x%x\n", insstrs, rd, si20);
+                    printf("lu32i.d      %s, 0x%x\n", rd, si20);
                     return;
                 case LA_1RI20_PCADDI:
-                    printf("   0x%llx   pcaddi  %s, 0x%x\n", insstrs, rd, si20);
+                    printf("pcaddi       %s, 0x%x\n", rd, si20);
                     return;
                 case LA_1RI20_PCALAU12I:
-                    printf("   0x%llx   pcalau12i  %s, 0x%x\n", insstrs, rd, si20);
+                    printf("pcalau12i    %s, 0x%x\n", rd, si20);
                     return;
                 case LA_1RI20_PCADDU12I:
-                    printf("   0x%llx   pcaddu12i  %s, 0x%x\n", insstrs, rd, si20);
+                    printf("pcaddu12i    %s, 0x%x\n", rd, si20);
                     return;
                 case LA_1RI20_PCADDU18I:
                 {
-                    printf("   0x%llx   pcaddu18i  %s, 0x%x\n", insstrs, rd, si20);
+                    printf("pcaddu18i    %s, 0x%x\n", rd, si20);
                     return;
                 }
                 default:
-                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
                     return;
             }
             return;
@@ -3992,31 +3993,31 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             switch (inscode)
             {
                 case LA_2RI14_LL_W:
-                    printf("   0x%llx   ll.w  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    printf("ll.w         %s, %s, %d\n", rd, rj, si14);
                     return;
                 case LA_2RI14_SC_W:
-                    printf("   0x%llx   sc.w  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    printf("sc.w         %s, %s, %d\n", rd, rj, si14);
                     return;
                 case LA_2RI14_LL_D:
-                    printf("   0x%llx   ll.d  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    printf("ll.d         %s, %s, %d\n", rd, rj, si14);
                     return;
                 case LA_2RI14_SC_D:
-                    printf("   0x%llx   sc.d  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    printf("sc.d         %s, %s, %d\n", rd, rj, si14);
                     return;
                 case LA_2RI14_LDPTR_W:
-                    printf("   0x%llx   ldptr.w  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    printf("ldptr.w      %s, %s, %d\n", rd, rj, si14);
                     return;
                 case LA_2RI14_STPTR_W:
-                    printf("   0x%llx   stptr.w  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    printf("stptr.w      %s, %s, %d\n", rd, rj, si14);
                     return;
                 case LA_2RI14_LDPTR_D:
-                    printf("   0x%llx   ldptr.d  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    printf("ldptr.d      %s, %s, %d\n", rd, rj, si14);
                     return;
                 case LA_2RI14_STPTR_D:
-                    printf("   0x%llx   stptr.d  %s, %s, %d\n", insstrs, rd, rj, si14);
+                    printf("stptr.d      %s, %s, %d\n", rd, rj, si14);
                     return;
                 default:
-                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
                     return;
             }
             return;
@@ -4033,55 +4034,55 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             switch (inscode)
             {
                 case LA_2RI12_LD_B:
-                    printf("   0x%llx   ld.b  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("ld.b         %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_LD_H:
-                    printf("   0x%llx   ld.h  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("ld.h         %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_LD_W:
-                    printf("   0x%llx   ld.w  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("ld.w         %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_LD_D:
-                    printf("   0x%llx   ld.d  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("ld.d         %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_ST_B:
-                    printf("   0x%llx   st.b  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("st.b         %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_ST_H:
-                    printf("   0x%llx   st.h  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("st.h         %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_ST_W:
-                    printf("   0x%llx   st.w  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("st.w         %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_ST_D:
-                    printf("   0x%llx   st.d  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("st.d         %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_LD_BU:
-                    printf("   0x%llx   ld.bu  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("ld.bu        %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_LD_HU:
-                    printf("   0x%llx   ld.hu  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("ld.hu        %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_LD_WU:
-                    printf("   0x%llx   ld.wu  %s, %s, %d\n", insstrs, rd, rj, si12);
+                    printf("ld.wu        %s, %s, %d\n", rd, rj, si12);
                     return;
                 case LA_2RI12_PRELD:
-                    assert(!"unimplemented on loongarch yet!");
+                    NYI_LOONGARCH64("unused instr LA_2RI12_PRELD");
                     return;
                 case LA_2RI12_FLD_S:
-                    printf("   0x%llx   fld.s  %s, %s, %d\n", insstrs, fd, rj, si12);
+                    printf("fld.s        %s, %s, %d\n", fd, rj, si12);
                     return;
                 case LA_2RI12_FST_S:
-                    printf("   0x%llx   fst.s  %s, %s, %d\n", insstrs, fd, rj, si12);
+                    printf("fst.s        %s, %s, %d\n", fd, rj, si12);
                     return;
                 case LA_2RI12_FLD_D:
-                    printf("   0x%llx   fld.d  %s, %s, %d\n", insstrs, fd, rj, si12);
+                    printf("fld.d        %s, %s, %d\n", fd, rj, si12);
                     return;
                 case LA_2RI12_FST_D:
-                    printf("   0x%llx   fst.d  %s, %s, %d\n", insstrs, fd, rj, si12);
+                    printf("fst.d        %s, %s, %d\n", fd, rj, si12);
                     return;
                 default:
-                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
                     return;
             }
             return;
@@ -4091,7 +4092,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
             offs21 >>= 9;
-            printf("   0x%llx   beqz  %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21);
+            printf("beqz         %s, 0x%llx\n", rj, (int64_t)insAdr + offs21);
             return;
         }
         case LA_1RI21_BNEZ: // 0x11
@@ -4099,7 +4100,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs21 = (((code >> 10) & 0xffff) | ((code & 0x1f) << 16)) << 11;
             offs21 >>= 9;
-            printf("   0x%llx   bnez  %s, 0x%llx\n", insstrs, rj, (int64_t)insstrs + offs21);
+            printf("bnez         %s, 0x%llx\n", rj, (int64_t)insAdr + offs21);
             return;
         }
         case 0x12:
@@ -4111,17 +4112,17 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             offs21 >>= 9;
             if (0 == ((code >> 8) & 0x3))
             {
-                printf("   0x%llx   bceqz  %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21);
+                printf("bceqz        %s, 0x%llx\n", cj, (int64_t)insAdr + offs21);
                 return;
             }
             else if (1 == ((code >> 8) & 0x3))
             {
-                printf("   0x%llx   bcnez  %s, 0x%llx\n", insstrs, cj, (int64_t)insstrs + offs21);
+                printf("bcnez        %s, 0x%llx\n", cj, (int64_t)insAdr + offs21);
                 return;
             }
             else
             {
-                printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                printf("LOONGARCH illegal instruction: %08X\n", code);
                 return;
             }
             return;
@@ -4137,11 +4138,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 assert(0 < id->idDebugOnlyInfo()->idMemCookie);
                 const char* methodName;
                 methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
-                printf("   0x%llx   jirl  %s, %s, %d  #%s\n", insstrs, rd, rj, offs16, methodName);
+                printf("jirl         %s, %s, %d  #%s\n", rd, rj, offs16, methodName);
             }
             else
             {
-                printf("   0x%llx   jirl  %s, %s, %d\n", insstrs, rd, rj, offs16);
+                printf("jirl         %s, %s, %d\n", rd, rj, offs16);
             }
             return;
         }
@@ -4149,14 +4150,14 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
         {
             int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6;
             offs26 >>= 4;
-            printf("   0x%llx   b  0x%llx\n", insstrs, (int64_t)insstrs + offs26);
+            printf("b            0x%llx\n", (int64_t)insAdr + offs26);
             return;
         }
         case LA_I26_BL: // 0x15
         {
             int offs26 = (((code >> 10) & 0xffff) | ((code & 0x3ff) << 16)) << 6;
             offs26 >>= 4;
-            printf("   0x%llx   bl  0x%llx\n", insstrs, (int64_t)insstrs + offs26);
+            printf("bl           0x%llx\n", (int64_t)insAdr + offs26);
             return;
         }
         case LA_2RI16_BEQ: // 0x16
@@ -4165,7 +4166,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
-            printf("   0x%llx   beq  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            printf("beq          %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
             return;
         }
         case LA_2RI16_BNE: // 0x17
@@ -4174,7 +4175,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
-            printf("   0x%llx   bne  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            printf("bne          %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
             return;
         }
         case LA_2RI16_BLT: // 0x18
@@ -4183,7 +4184,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
-            printf("   0x%llx   blt  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            printf("blt          %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
             return;
         }
         case LA_2RI16_BGE: // 0x19
@@ -4192,7 +4193,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
-            printf("   0x%llx   bge  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            printf("bge          %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
             return;
         }
         case LA_2RI16_BLTU: // 0x1a
@@ -4201,7 +4202,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
-            printf("   0x%llx   bltu  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            printf("bltu         %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
             return;
         }
         case LA_2RI16_BGEU: // 0x1b
@@ -4210,12 +4211,12 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj     = RegNames[(code >> 5) & 0x1f];
             int         offs16 = (short)((code >> 10) & 0xffff);
             offs16 <<= 2;
-            printf("   0x%llx   bgeu  %s, %s, 0x%llx\n", insstrs, rj, rd, (int64_t)insstrs + offs16);
+            printf("bgeu         %s, %s, 0x%llx\n", rj, rd, (int64_t)insAdr + offs16);
             return;
         }
 
         default:
-            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            printf("LOONGARCH illegal instruction: %08X\n", code);
             return;
     }
 
@@ -4246,80 +4247,80 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                             switch (inscode3)
                             {
                                 case LA_2R_CLO_W:
-                                    printf("   0x%llx   clo.w  %s, %s\n", insstrs, rd, rj);
+                                    printf("clo.w        %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_CLZ_W:
-                                    printf("   0x%llx   clz.w  %s, %s\n", insstrs, rd, rj);
+                                    printf("clz.w        %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_CTO_W:
-                                    printf("   0x%llx   cto.w  %s, %s\n", insstrs, rd, rj);
+                                    printf("cto.w        %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_CTZ_W:
-                                    printf("   0x%llx   ctz.w  %s, %s\n", insstrs, rd, rj);
+                                    printf("ctz.w        %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_CLO_D:
-                                    printf("   0x%llx   clo.d  %s, %s\n", insstrs, rd, rj);
+                                    printf("clo.d        %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_CLZ_D:
-                                    printf("   0x%llx   clz.d  %s, %s\n", insstrs, rd, rj);
+                                    printf("clz.d        %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_CTO_D:
-                                    printf("   0x%llx   cto.d  %s, %s\n", insstrs, rd, rj);
+                                    printf("cto.d        %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_CTZ_D:
-                                    printf("   0x%llx   ctz.d  %s, %s\n", insstrs, rd, rj);
+                                    printf("ctz.d        %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_REVB_2H:
-                                    printf("   0x%llx   revb.2h  %s, %s\n", insstrs, rd, rj);
+                                    printf("revb.2h      %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_REVB_4H:
-                                    printf("   0x%llx   revb.4h  %s, %s\n", insstrs, rd, rj);
+                                    printf("revb.4h      %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_REVB_2W:
-                                    printf("   0x%llx   revb.2w  %s, %s\n", insstrs, rd, rj);
+                                    printf("revb.2w      %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_REVB_D:
-                                    printf("   0x%llx   revb.d  %s, %s\n", insstrs, rd, rj);
+                                    printf("revb.d       %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_REVH_2W:
-                                    printf("   0x%llx   revh.2w  %s, %s\n", insstrs, rd, rj);
+                                    printf("revh.2w      %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_REVH_D:
-                                    printf("   0x%llx   revh.d  %s, %s\n", insstrs, rd, rj);
+                                    printf("revh.d       %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_BITREV_4B:
-                                    printf("   0x%llx   bitrev.4b  %s, %s\n", insstrs, rd, rj);
+                                    printf("bitrev.4b    %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_BITREV_8B:
-                                    printf("   0x%llx   bitrev.8b  %s, %s\n", insstrs, rd, rj);
+                                    printf("bitrev.8b    %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_BITREV_W:
-                                    printf("   0x%llx   bitrev.w  %s, %s\n", insstrs, rd, rj);
+                                    printf("bitrev.w     %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_BITREV_D:
-                                    printf("   0x%llx   bitrev.d  %s, %s\n", insstrs, rd, rj);
+                                    printf("bitrev.d     %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_EXT_W_H:
-                                    printf("   0x%llx   ext.w.h  %s, %s\n", insstrs, rd, rj);
+                                    printf("ext.w.h      %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_EXT_W_B:
-                                    printf("   0x%llx   ext.w.b  %s, %s\n", insstrs, rd, rj);
+                                    printf("ext.w.b      %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_RDTIMEL_W:
-                                    printf("   0x%llx   rdtimel.w  %s, %s\n", insstrs, rd, rj);
+                                    printf("rdtimel.w    %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_RDTIMEH_W:
-                                    printf("   0x%llx   rdtimeh.w  %s, %s\n", insstrs, rd, rj);
+                                    printf("rdtimeh.w    %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_RDTIME_D:
-                                    printf("   0x%llx   rdtime.d  %s, %s\n", insstrs, rd, rj);
+                                    printf("rdtime.d     %s, %s\n", rd, rj);
                                     return;
                                 case LA_2R_CPUCFG:
-                                    printf("   0x%llx   cpucfg  %s, %s\n", insstrs, rd, rj);
+                                    printf("cpucfg       %s, %s\n", rd, rj);
                                     return;
 
                                 default:
-                                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                                    printf("LOONGARCH illegal instruction: %08X\n", code);
                                     return;
                             }
                             return;
@@ -4328,18 +4329,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         {
                             const char* rj = RegNames[(code >> 5) & 0x1f];
                             const char* rk = RegNames[(code >> 10) & 0x1f];
-                            printf("   0x%llx   asrtle.d  %s, %s\n", insstrs, rj, rk);
+                            printf("asrtle.d     %s, %s\n", rj, rk);
                             return;
                         }
                         case LA_2R_ASRTGT_D:
                         {
                             const char* rj = RegNames[(code >> 5) & 0x1f];
                             const char* rk = RegNames[(code >> 10) & 0x1f];
-                            printf("   0x%llx   asrtgt.d  %s, %s\n", insstrs, rj, rk);
+                            printf("asrtgt.d     %s, %s\n", rj, rk);
                             return;
                         }
                         default:
-                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
                             return;
                     }
                     return;
@@ -4354,17 +4355,17 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     unsigned int sa2 = (code >> 15) & 0x3;
                     if (0 == ((code >> 17) & 0x1))
                     {
-                        printf("   0x%llx   alsl.w  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1));
+                        printf("alsl.w       %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1));
                         return;
                     }
                     else if (1 == ((code >> 17) & 0x1))
                     {
-                        printf("   0x%llx   alsl.wu  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1));
+                        printf("alsl.wu      %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1));
                         return;
                     }
                     else
                     {
-                        printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                        printf("LOONGARCH illegal instruction: %08X\n", code);
                         return;
                     }
                     return;
@@ -4375,7 +4376,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     const char*  rj  = RegNames[(code >> 5) & 0x1f];
                     const char*  rk  = RegNames[(code >> 10) & 0x1f];
                     unsigned int sa2 = (code >> 15) & 0x3;
-                    printf("   0x%llx   bytepick.w  %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa2);
+                    printf("bytepick.w   %s, %s, %s, %d\n", rd, rj, rk, sa2);
                     return;
                 }
                 case LA_OP_BYTEPICK_D: // 0x3
@@ -4384,7 +4385,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     const char*  rj  = RegNames[(code >> 5) & 0x1f];
                     const char*  rk  = RegNames[(code >> 10) & 0x1f];
                     unsigned int sa3 = (code >> 15) & 0x7;
-                    printf("   0x%llx   bytepick.d  %s, %s, %s, %d\n", insstrs, rd, rj, rk, sa3);
+                    printf("bytepick.d   %s, %s, %s, %d\n", rd, rj, rk, sa3);
                     return;
                 }
                 case 0x4:
@@ -4403,145 +4404,145 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     switch (inscode2)
                     {
                         case LA_3R_ADD_W:
-                            printf("   0x%llx   add.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("add.w        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_ADD_D:
-                            printf("   0x%llx   add.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("add.d        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SUB_W:
-                            printf("   0x%llx   sub.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("sub.w        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SUB_D:
-                            printf("   0x%llx   sub.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("sub.d        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SLT:
-                            printf("   0x%llx   slt  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("slt          %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SLTU:
-                            printf("   0x%llx   sltu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("sltu         %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MASKEQZ:
-                            printf("   0x%llx   maskeqz  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("maskeqz      %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MASKNEZ:
-                            printf("   0x%llx   masknez  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("masknez      %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_NOR:
-                            printf("   0x%llx   nor  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("nor          %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_AND:
-                            printf("   0x%llx   and  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("and          %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_OR:
-                            printf("   0x%llx   or  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("or           %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_XOR:
-                            printf("   0x%llx   xor  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("xor          %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_ORN:
-                            printf("   0x%llx   orn  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("orn          %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_ANDN:
-                            printf("   0x%llx   andn  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("andn         %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SLL_W:
-                            printf("   0x%llx   sll.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("sll.w        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SRL_W:
-                            printf("   0x%llx   srl.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("srl.w        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SRA_W:
-                            printf("   0x%llx   sra.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("sra.w        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SLL_D:
-                            printf("   0x%llx   sll.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("sll.d        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SRL_D:
-                            printf("   0x%llx   srl.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("srl.d        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_SRA_D:
-                            printf("   0x%llx   sra.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("sra.d        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_ROTR_W:
-                            printf("   0x%llx   rotr.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("rotr.w       %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_ROTR_D:
-                            printf("   0x%llx   rotr.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("rotr.d       %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MUL_W:
-                            printf("   0x%llx   mul.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mul.w        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MULH_W:
-                            printf("   0x%llx   mulh.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mulh.w       %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MULH_WU:
-                            printf("   0x%llx   mulh.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mulh.wu      %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MUL_D:
-                            printf("   0x%llx   mul.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mul.d        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MULH_D:
-                            printf("   0x%llx   mulh.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mulh.d       %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MULH_DU:
-                            printf("   0x%llx   mulh.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mulh.du      %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MULW_D_W:
-                            printf("   0x%llx   mulw.d.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mulw.d.w     %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MULW_D_WU:
-                            printf("   0x%llx   mulw.d.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mulw.d.wu    %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_DIV_W:
-                            printf("   0x%llx   div.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("div.w        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MOD_W:
-                            printf("   0x%llx   mod.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mod.w        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_DIV_WU:
-                            printf("   0x%llx   div.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("div.wu       %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MOD_WU:
-                            printf("   0x%llx   mod.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mod.wu       %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_DIV_D:
-                            printf("   0x%llx   div.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("div.d        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MOD_D:
-                            printf("   0x%llx   mod.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mod.d        %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_DIV_DU:
-                            printf("   0x%llx   div.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("div.du       %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_MOD_DU:
-                            printf("   0x%llx   mod.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("mod.du       %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_CRC_W_B_W:
-                            printf("   0x%llx   crc.w.b.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("crc.w.b.w    %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_CRC_W_H_W:
-                            printf("   0x%llx   crc.w.h.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("crc.w.h.w    %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_CRC_W_W_W:
-                            printf("   0x%llx   crc.w.w.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("crc.w.w.w    %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_CRC_W_D_W:
-                            printf("   0x%llx   crc.w.d.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("crc.w.d.w    %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_CRCC_W_B_W:
-                            printf("   0x%llx   crcc.w.b.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("crcc.w.b.w   %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_CRCC_W_H_W:
-                            printf("   0x%llx   crcc.w.h.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("crcc.w.h.w   %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_CRCC_W_W_W:
-                            printf("   0x%llx   crcc.w.w.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("crcc.w.w.w   %s, %s, %s\n", rd, rj, rk);
                             return;
                         case LA_3R_CRCC_W_D_W:
-                            printf("   0x%llx   crcc.w.d.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+                            printf("crcc.w.d.w   %s, %s, %s\n", rd, rj, rk);
                             return;
                         default:
-                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
                             return;
                     }
                 }
@@ -4553,16 +4554,16 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     switch (inscode2)
                     {
                         case LA_OP_BREAK:
-                            printf("   0x%llx   break  0x%x\n", insstrs, codefield);
+                            printf("break        0x%x\n", codefield);
                             return;
                         case LA_OP_DBGCALL:
-                            printf("   0x%llx   dbgcall  0x%x\n", insstrs, codefield);
+                            printf("dbgcall      0x%x\n", codefield);
                             return;
                         case LA_OP_SYSCALL:
-                            printf("   0x%llx   syscall  0x%x\n", insstrs, codefield);
+                            printf("syscall      0x%x\n", codefield);
                             return;
                         default:
-                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
                             return;
                     }
                 }
@@ -4572,11 +4573,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     const char*  rj  = RegNames[(code >> 5) & 0x1f];
                     const char*  rk  = RegNames[(code >> 10) & 0x1f];
                     unsigned int sa2 = (code >> 15) & 0x3;
-                    printf("   0x%llx   alsl.d  %s, %s, %s, %d\n", insstrs, rd, rj, rk, (sa2 + 1));
+                    printf("alsl.d       %s, %s, %s, %d\n", rd, rj, rk, (sa2 + 1));
                     return;
                 }
                 default:
-                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
                     return;
             }
             return;
@@ -4593,17 +4594,17 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                 unsigned int msbw = (code >> 16) & 0x1f;
                 if (!(code & 0x8000))
                 {
-                    printf("   0x%llx   bstrins.w  %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw);
+                    printf("bstrins.w    %s, %s, %d, %d\n", rd, rj, msbw, lsbw);
                     return;
                 }
                 else if (code & 0x8000)
                 {
-                    printf("   0x%llx   bstrpick.w  %s, %s, %d, %d\n", insstrs, rd, rj, msbw, lsbw);
+                    printf("bstrpick.w   %s, %s, %d, %d\n", rd, rj, msbw, lsbw);
                     return;
                 }
                 else
                 {
-                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
                     return;
                 }
             }
@@ -4622,18 +4623,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         if (1 == ((code >> 15) & 0x7))
                         {
                             unsigned int ui5 = (code >> 10) & 0x1f;
-                            printf("   0x%llx   slli.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
+                            printf("slli.w       %s, %s, %d\n", rd, rj, ui5);
                             return;
                         }
                         else if (1 == ((code >> 16) & 0x3))
                         {
                             unsigned int ui6 = (code >> 10) & 0x3f;
-                            printf("   0x%llx   slli.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
+                            printf("slli.d       %s, %s, %d\n", rd, rj, ui6);
                             return;
                         }
                         else
                         {
-                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
                             return;
                         }
                         return;
@@ -4647,18 +4648,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         if (1 == ((code >> 15) & 0x7))
                         {
                             unsigned int ui5 = (code >> 10) & 0x1f;
-                            printf("   0x%llx   srli.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
+                            printf("srli.w       %s, %s, %d\n", rd, rj, ui5);
                             return;
                         }
                         else if (1 == ((code >> 16) & 0x3))
                         {
                             unsigned int ui6 = (code >> 10) & 0x3f;
-                            printf("   0x%llx   srli.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
+                            printf("srli.d      %s, %s, %d\n", rd, rj, ui6);
                             return;
                         }
                         else
                         {
-                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
                             return;
                         }
                         return;
@@ -4672,18 +4673,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         if (1 == ((code >> 15) & 0x7))
                         {
                             unsigned int ui5 = (code >> 10) & 0x1f;
-                            printf("   0x%llx   srai.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
+                            printf("srai.w       %s, %s, %d\n", rd, rj, ui5);
                             return;
                         }
                         else if (1 == ((code >> 16) & 0x3))
                         {
                             unsigned int ui6 = (code >> 10) & 0x3f;
-                            printf("   0x%llx   srai.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
+                            printf("srai.d       %s, %s, %d\n", rd, rj, ui6);
                             return;
                         }
                         else
                         {
-                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
                             return;
                         }
                         return;
@@ -4697,24 +4698,24 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                         if (1 == ((code >> 15) & 0x7))
                         {
                             unsigned int ui5 = (code >> 10) & 0x1f;
-                            printf("   0x%llx   rotri.w  %s, %s, %d\n", insstrs, rd, rj, ui5);
+                            printf("rotri.w      %s, %s, %d\n", rd, rj, ui5);
                             return;
                         }
                         else if (1 == ((code >> 16) & 0x3))
                         {
                             unsigned int ui6 = (code >> 10) & 0x3f;
-                            printf("   0x%llx   rotri.d  %s, %s, %d\n", insstrs, rd, rj, ui6);
+                            printf("rotri.d      %s, %s, %d\n", rd, rj, ui6);
                             return;
                         }
                         else
                         {
-                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
                             return;
                         }
                         return;
                     }
                     default:
-                        printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                        printf("LOONGARCH illegal instruction: %08X\n", code);
                         return;
                 }
                 return;
@@ -4727,7 +4728,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int lsbd = (code >> 10) & 0x3f;
             unsigned int msbd = (code >> 16) & 0x3f;
-            printf("   0x%llx   bstrins.d  %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd);
+            printf("bstrins.d    %s, %s, %d, %d\n", rd, rj, msbd, lsbd);
             return;
         }
         case LA_OP_BSTRPICK_D:
@@ -4736,7 +4737,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int lsbd = (code >> 10) & 0x3f;
             unsigned int msbd = (code >> 16) & 0x3f;
-            printf("   0x%llx   bstrpick.d  %s, %s, %d, %d\n", insstrs, rd, rj, msbd, lsbd);
+            printf("bstrpick.d   %s, %s, %d, %d\n", rd, rj, msbd, lsbd);
             return;
         }
         case 0x4:
@@ -4752,64 +4753,64 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             switch (inscode1)
             {
                 case LA_3R_FADD_S:
-                    printf("   0x%llx   fadd.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fadd.s       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FADD_D:
-                    printf("   0x%llx   fadd.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fadd.d       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FSUB_S:
-                    printf("   0x%llx   fsub.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fsub.s       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FSUB_D:
-                    printf("   0x%llx   fsub.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fsub.d       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMUL_S:
-                    printf("   0x%llx   fmul.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmul.s       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMUL_D:
-                    printf("   0x%llx   fmul.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmul.d       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FDIV_S:
-                    printf("   0x%llx   fdiv.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fdiv.s       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FDIV_D:
-                    printf("   0x%llx   fdiv.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fdiv.d       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMAX_S:
-                    printf("   0x%llx   fmax.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmax.s       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMAX_D:
-                    printf("   0x%llx   fmax.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmax.d       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMIN_S:
-                    printf("   0x%llx   fmin.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmin.s       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMIN_D:
-                    printf("   0x%llx   fmin.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmin.d       %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMAXA_S:
-                    printf("   0x%llx   fmaxa.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmaxa.s      %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMAXA_D:
-                    printf("   0x%llx   fmaxa.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmaxa.d      %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMINA_S:
-                    printf("   0x%llx   fmina.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmina.s      %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FMINA_D:
-                    printf("   0x%llx   fmina.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fmina.d      %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FSCALEB_S:
-                    printf("   0x%llx   fscaleb.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fscaleb.s    %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FSCALEB_D:
-                    printf("   0x%llx   fscaleb.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fscaleb.d    %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FCOPYSIGN_S:
-                    printf("   0x%llx   fcopysign.s  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fcopysign.s  %s, %s, %s\n", fd, fj, fk);
                     return;
                 case LA_3R_FCOPYSIGN_D:
-                    printf("   0x%llx   fcopysign.d  %s, %s, %s\n", insstrs, fd, fj, fk);
+                    printf("fcopysign.d  %s, %s, %s\n", fd, fj, fk);
                     return;
                 case 0x228:
                 case 0x229:
@@ -4825,194 +4826,194 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
                     switch (inscode2)
                     {
                         case LA_2R_FABS_S:
-                            printf("   0x%llx   fabs.s  %s, %s\n", insstrs, fd, fj);
+                            printf("fabs.s       %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FABS_D:
-                            printf("   0x%llx   fabs.d  %s, %s\n", insstrs, fd, fj);
+                            printf("fabs.d       %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FNEG_S:
-                            printf("   0x%llx   fneg.s  %s, %s\n", insstrs, fd, fj);
+                            printf("fneg.s       %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FNEG_D:
-                            printf("   0x%llx   fneg.d  %s, %s\n", insstrs, fd, fj);
+                            printf("fneg.d       %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FLOGB_S:
-                            printf("   0x%llx   flogb.s  %s, %s\n", insstrs, fd, fj);
+                            printf("flogb.s      %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FLOGB_D:
-                            printf("   0x%llx   flogb.d  %s, %s\n", insstrs, fd, fj);
+                            printf("flogb.d      %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FCLASS_S:
-                            printf("   0x%llx   fclass.s  %s, %s\n", insstrs, fd, fj);
+                            printf("fclass.s     %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FCLASS_D:
-                            printf("   0x%llx   fclass.d  %s, %s\n", insstrs, fd, fj);
+                            printf("fclass.d     %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FSQRT_S:
-                            printf("   0x%llx   fsqrt.s  %s, %s\n", insstrs, fd, fj);
+                            printf("fsqrt.s      %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FSQRT_D:
-                            printf("   0x%llx   fsqrt.d  %s, %s\n", insstrs, fd, fj);
+                            printf("fsqrt.d      %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FRECIP_S:
-                            printf("   0x%llx   frecip.s  %s, %s\n", insstrs, fd, fj);
+                            printf("frecip.s     %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FRECIP_D:
-                            printf("   0x%llx   frecip.d  %s, %s\n", insstrs, fd, fj);
+                            printf("frecip.d     %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FRSQRT_S:
-                            printf("   0x%llx   frsqrt.s  %s, %s\n", insstrs, fd, fj);
+                            printf("frsqrt.s     %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FRSQRT_D:
-                            printf("   0x%llx   frsqrt.d  %s, %s\n", insstrs, fd, fj);
+                            printf("frsqrt.d     %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FMOV_S:
-                            printf("   0x%llx   fmov.s  %s, %s\n", insstrs, fd, fj);
+                            printf("fmov.s       %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FMOV_D:
-                            printf("   0x%llx   fmov.d  %s, %s\n", insstrs, fd, fj);
+                            printf("fmov.d       %s, %s\n", fd, fj);
                             return;
                         case LA_2R_MOVGR2FR_W:
-                            printf("   0x%llx   movgr2fr.w  %s, %s\n", insstrs, fd, rj);
+                            printf("movgr2fr.w   %s, %s\n", fd, rj);
                             return;
                         case LA_2R_MOVGR2FR_D:
-                            printf("   0x%llx   movgr2fr.d  %s, %s\n", insstrs, fd, rj);
+                            printf("movgr2fr.d   %s, %s\n", fd, rj);
                             return;
                         case LA_2R_MOVGR2FRH_W:
-                            printf("   0x%llx   movgr2frh.w  %s, %s\n", insstrs, fd, rj);
+                            printf("movgr2frh.w  %s, %s\n", fd, rj);
                             return;
                         case LA_2R_MOVFR2GR_S:
-                            printf("   0x%llx   movfr2gr.s  %s, %s\n", insstrs, rd, fj);
+                            printf("movfr2gr.s   %s, %s\n", rd, fj);
                             return;
                         case LA_2R_MOVFR2GR_D:
-                            printf("   0x%llx   movfr2gr.d  %s, %s\n", insstrs, rd, fj);
+                            printf("movfr2gr.d   %s, %s\n", rd, fj);
                             return;
                         case LA_2R_MOVFRH2GR_S:
-                            printf("   0x%llx   movfrh2gr.s  %s, %s\n", insstrs, rd, fj);
+                            printf("movfrh2gr.s  %s, %s\n", rd, fj);
                             return;
                         case LA_2R_MOVGR2FCSR:
-                            assert(!"unimplemented on loongarch yet!");
+                            NYI_LOONGARCH64("unused instr LA_2R_MOVGR2FCSR");
                             return;
                         case LA_2R_MOVFCSR2GR:
-                            assert(!"unimplemented on loongarch yet!");
+                            NYI_LOONGARCH64("unused instr LA_2R_MOVFCSR2GR");
                             return;
                         case LA_2R_MOVFR2CF:
                         {
                             const char* cd = CFregName[code & 0x7];
-                            printf("   0x%llx   movfr2cf  %s, %s\n", insstrs, cd, fj);
+                            printf("movfr2cf     %s, %s\n", cd, fj);
                             return;
                         }
                         case LA_2R_MOVCF2FR:
                         {
                             const char* cj = CFregName[(code >> 5) & 0x7];
-                            printf("   0x%llx   movcf2fr  %s, %s\n", insstrs, fd, cj);
+                            printf("movcf2fr     %s, %s\n", fd, cj);
                             return;
                         }
                         case LA_2R_MOVGR2CF:
                         {
                             const char* cd = CFregName[code & 0x7];
-                            printf("   0x%llx   movgr2cf  %s, %s\n", insstrs, cd, rj);
+                            printf("movgr2cf     %s, %s\n", cd, rj);
                             return;
                         }
                         case LA_2R_MOVCF2GR:
                         {
                             const char* cj = CFregName[(code >> 5) & 0x7];
-                            printf("   0x%llx   movcf2gr  %s, %s\n", insstrs, rd, cj);
+                            printf("movcf2gr     %s, %s\n", rd, cj);
                             return;
                         }
                         case LA_2R_FCVT_S_D:
-                            printf("   0x%llx   fcvt.s.d  %s, %s\n", insstrs, fd, fj);
+                            printf("fcvt.s.d     %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FCVT_D_S:
-                            printf("   0x%llx   fcvt.d.s  %s, %s\n", insstrs, fd, fj);
+                            printf("fcvt.d.s     %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRM_W_S:
-                            printf("   0x%llx   ftintrm.w.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrm.w.s  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRM_W_D:
-                            printf("   0x%llx   ftintrm.w.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrm.w.d  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRM_L_S:
-                            printf("   0x%llx   ftintrm.l.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrm.l.s  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRM_L_D:
-                            printf("   0x%llx   ftintrm.l.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrm.l.d  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRP_W_S:
-                            printf("   0x%llx   ftintrp.w.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrp.w.s  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRP_W_D:
-                            printf("   0x%llx   ftintrp.w.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrp.w.d  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRP_L_S:
-                            printf("   0x%llx   ftintrp.l.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrp.l.s  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRP_L_D:
-                            printf("   0x%llx   ftintrp.l.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrp.l.d  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRZ_W_S:
-                            printf("   0x%llx   ftintrz.w.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrz.w.s  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRZ_W_D:
-                            printf("   0x%llx   ftintrz.w.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrz.w.d  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRZ_L_S:
-                            printf("   0x%llx   ftintrz.l.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrz.l.s  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRZ_L_D:
-                            printf("   0x%llx   ftintrz.l.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrz.l.d  %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRNE_W_S:
-                            printf("   0x%llx   ftintrne.w.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrne.w.s %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRNE_W_D:
-                            printf("   0x%llx   ftintrne.w.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrne.w.d %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRNE_L_S:
-                            printf("   0x%llx   ftintrne.l.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrne.l.s %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINTRNE_L_D:
-                            printf("   0x%llx   ftintrne.l.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftintrne.l.d %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINT_W_S:
-                            printf("   0x%llx   ftint.w.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftint.w.s    %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINT_W_D:
-                            printf("   0x%llx   ftint.w.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftint.w.d    %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINT_L_S:
-                            printf("   0x%llx   ftint.l.s  %s, %s\n", insstrs, fd, fj);
+                            printf("ftint.l.s    %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FTINT_L_D:
-                            printf("   0x%llx   ftint.l.d  %s, %s\n", insstrs, fd, fj);
+                            printf("ftint.l.d    %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FFINT_S_W:
-                            printf("   0x%llx   ffint.s.w  %s, %s\n", insstrs, fd, fj);
+                            printf("ffint.s.w    %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FFINT_S_L:
-                            printf("   0x%llx   ffint.s.l  %s, %s\n", insstrs, fd, fj);
+                            printf("ffint.s.l    %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FFINT_D_W:
-                            printf("   0x%llx   ffint.d.w  %s, %s\n", insstrs, fd, fj);
+                            printf("ffint.d.w    %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FFINT_D_L:
-                            printf("   0x%llx   ffint.d.l  %s, %s\n", insstrs, fd, fj);
+                            printf("ffint.d.l    %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FRINT_S:
-                            printf("   0x%llx   frint.s  %s, %s\n", insstrs, fd, fj);
+                            printf("frint.s      %s, %s\n", fd, fj);
                             return;
                         case LA_2R_FRINT_D:
-                            printf("   0x%llx   frint.d  %s, %s\n", insstrs, fd, fj);
+                            printf("frint.d      %s, %s\n", fd, fj);
                             return;
                         default:
-                            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                            printf("LOONGARCH illegal instruction: %08X\n", code);
                             return;
                     }
                     return;
                 }
 
                 default:
-                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
                     return;
             }
             return;
@@ -5023,7 +5024,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
-            printf("   0x%llx   slti  %s, %s, %d\n", insstrs, rd, rj, si12);
+            printf("slti         %s, %s, %d\n", rd, rj, si12);
             return;
         }
         case LA_2RI12_SLTUI: // 0x9
@@ -5032,7 +5033,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
-            printf("   0x%llx   sltui  %s, %s, %d\n", insstrs, rd, rj, si12);
+            printf("sltui        %s, %s, %d\n", rd, rj, si12);
             return;
         }
         case LA_2RI12_ADDI_W: // 0xa
@@ -5041,7 +5042,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
-            printf("   0x%llx   addi.w  %s, %s, %d\n", insstrs, rd, rj, si12);
+            printf("addi.w       %s, %s, %d\n", rd, rj, si12);
             return;
         }
         case LA_2RI12_ADDI_D: // 0xb
@@ -5050,7 +5051,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rj   = RegNames[(code >> 5) & 0x1f];
             short       si12 = ((code >> 10) & 0xfff) << 4;
             si12 >>= 4;
-            printf("   0x%llx   addi.d  %s, %s, %ld\n", insstrs, rd, rj, si12);
+            printf("addi.d       %s, %s, %ld\n", rd, rj, si12);
             return;
         }
         case LA_2RI12_LU52I_D: // 0xc
@@ -5058,15 +5059,22 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char*  rd   = RegNames[code & 0x1f];
             const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int si12 = (code >> 10) & 0xfff;
-            printf("   0x%llx   lu52i.d  %s, %s, 0x%x\n", insstrs, rd, rj, si12);
+            printf("lu52i.d      %s, %s, 0x%x\n", rd, rj, si12);
             return;
         }
         case LA_2RI12_ANDI: // 0xd
         {
-            const char*  rd   = RegNames[code & 0x1f];
-            const char*  rj   = RegNames[(code >> 5) & 0x1f];
-            unsigned int ui12 = ((code >> 10) & 0xfff);
-            printf("   0x%llx   andi  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
+            if (code == 0x03400000)
+            {
+                printf("nop\n");
+            }
+            else
+            {
+                const char*  rd   = RegNames[code & 0x1f];
+                const char*  rj   = RegNames[(code >> 5) & 0x1f];
+                unsigned int ui12 = ((code >> 10) & 0xfff);
+                printf("andi         %s, %s, 0x%x\n", rd, rj, ui12);
+            }
             return;
         }
         case LA_2RI12_ORI: // 0xe
@@ -5074,7 +5082,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char*  rd   = RegNames[code & 0x1f];
             const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int ui12 = ((code >> 10) & 0xfff);
-            printf("   0x%llx   ori  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
+            printf("ori          %s, %s, 0x%x\n", rd, rj, ui12);
             return;
         }
         case LA_2RI12_XORI: // 0xf
@@ -5082,12 +5090,12 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char*  rd   = RegNames[code & 0x1f];
             const char*  rj   = RegNames[(code >> 5) & 0x1f];
             unsigned int ui12 = ((code >> 10) & 0xfff);
-            printf("   0x%llx   xori  %s, %s, 0x%x\n", insstrs, rd, rj, ui12);
+            printf("xori         %s, %s, 0x%x\n", rd, rj, ui12);
             return;
         }
 
         default:
-            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            printf("LOONGARCH illegal instruction: %08X\n", code);
             return;
     }
 
@@ -5107,7 +5115,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
             const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
-            printf("   0x%llx   fmadd.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            printf("fmadd.s      %s, %s, %s, %s\n", fd, fj, fk, fa);
             return;
         }
         case LA_4R_FMADD_D:
@@ -5116,7 +5124,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
             const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
-            printf("   0x%llx   fmadd.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            printf("fmadd.d      %s, %s, %s, %s\n", fd, fj, fk, fa);
             return;
         }
         case LA_4R_FMSUB_S:
@@ -5125,7 +5133,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
             const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
-            printf("   0x%llx   fmsub.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            printf("fmsub.s      %s, %s, %s, %s\n", fd, fj, fk, fa);
             return;
         }
         case LA_4R_FMSUB_D:
@@ -5134,7 +5142,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
             const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
-            printf("   0x%llx   fmsub.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            printf("fmsub.d      %s, %s, %s, %s\n", fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMADD_S:
@@ -5143,7 +5151,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
             const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
-            printf("   0x%llx   fnmadd.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            printf("fnmadd.s     %s, %s, %s, %s\n", fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMADD_D:
@@ -5152,7 +5160,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
             const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
-            printf("   0x%llx   fnmadd.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            printf("fnmadd.d     %s, %s, %s, %s\n", fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMSUB_S:
@@ -5161,7 +5169,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
             const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
-            printf("   0x%llx   fnmsub.s  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            printf("fnmsub.s     %s, %s, %s, %s\n", fd, fj, fk, fa);
             return;
         }
         case LA_4R_FNMSUB_D:
@@ -5170,11 +5178,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
             const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* fa = RegNames[((code >> 15) & 0x1f) + 32];
-            printf("   0x%llx   fnmsub.d  %s, %s, %s, %s\n", insstrs, fd, fj, fk, fa);
+            printf("fnmsub.d     %s, %s, %s, %s\n", fd, fj, fk, fa);
             return;
         }
         default:
-            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            printf("LOONGARCH illegal instruction: %08X\n", code);
             return;
     }
 
@@ -5194,73 +5202,73 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             switch (cond)
             {
                 case 0x0:
-                    printf("   0x%llx   fcmp.caf.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.caf.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x1:
-                    printf("   0x%llx   fcmp.saf.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.saf.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x2:
-                    printf("   0x%llx   fcmp.clt.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.clt.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x3:
-                    printf("   0x%llx   fcmp.slt.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.slt.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x4:
-                    printf("   0x%llx   fcmp.ceq.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.ceq.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x5:
-                    printf("   0x%llx   fcmp.seq.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.seq.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x6:
-                    printf("   0x%llx   fcmp.cle.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cle.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x7:
-                    printf("   0x%llx   fcmp.sle.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sle.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x8:
-                    printf("   0x%llx   fcmp.cun.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cun.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x9:
-                    printf("   0x%llx   fcmp.sun.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sun.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xA:
-                    printf("   0x%llx   fcmp.cult.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cult.s  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xB:
-                    printf("   0x%llx   fcmp.sult.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sult.s  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xC:
-                    printf("   0x%llx   fcmp.cueq.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cueq.s  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xD:
-                    printf("   0x%llx   fcmp.sueq.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sueq.s  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xE:
-                    printf("   0x%llx   fcmp.cule.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cule.s  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xF:
-                    printf("   0x%llx   fcmp.sule.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sule.s  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x10:
-                    printf("   0x%llx   fcmp.cne.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cne.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x11:
-                    printf("   0x%llx   fcmp.sne.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sne.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x14:
-                    printf("   0x%llx   fcmp.cor.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cor.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x15:
-                    printf("   0x%llx   fcmp.sor.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sor.s   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x18:
-                    printf("   0x%llx   fcmp.cune.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cune.s  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x19:
-                    printf("   0x%llx   fcmp.sune.s  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sune.s  %s, %s, %s\n", cd, fj, fk);
                     return;
                 default:
-                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
                     return;
             }
         }
@@ -5274,73 +5282,73 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             switch (cond)
             {
                 case 0x0:
-                    printf("   0x%llx   fcmp.caf.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.caf.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x1:
-                    printf("   0x%llx   fcmp.saf.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.saf.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x2:
-                    printf("   0x%llx   fcmp.clt.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.clt.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x3:
-                    printf("   0x%llx   fcmp.slt.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.slt.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x4:
-                    printf("   0x%llx   fcmp.ceq.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.ceq.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x5:
-                    printf("   0x%llx   fcmp.seq.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.seq.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x6:
-                    printf("   0x%llx   fcmp.cle.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cle.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x7:
-                    printf("   0x%llx   fcmp.sle.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sle.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x8:
-                    printf("   0x%llx   fcmp.cun.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cun.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x9:
-                    printf("   0x%llx   fcmp.sun.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sun.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xA:
-                    printf("   0x%llx   fcmp.cult.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cult.d  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xB:
-                    printf("   0x%llx   fcmp.sult.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sult.d  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xC:
-                    printf("   0x%llx   fcmp.cueq.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cueq.d  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xD:
-                    printf("   0x%llx   fcmp.sueq.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sueq.d  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xE:
-                    printf("   0x%llx   fcmp.cule.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cule.d  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0xF:
-                    printf("   0x%llx   fcmp.sule.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sule.d  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x10:
-                    printf("   0x%llx   fcmp.cne.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cne.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x11:
-                    printf("   0x%llx   fcmp.sne.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sne.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x14:
-                    printf("   0x%llx   fcmp.cor.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cor.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x15:
-                    printf("   0x%llx   fcmp.sor.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sor.d   %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x18:
-                    printf("   0x%llx   fcmp.cune.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.cune.d  %s, %s, %s\n", cd, fj, fk);
                     return;
                 case 0x19:
-                    printf("   0x%llx   fcmp.sune.d  %s, %s, %s\n", insstrs, cd, fj, fk);
+                    printf("fcmp.sune.d  %s, %s, %s\n", cd, fj, fk);
                     return;
                 default:
-                    printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+                    printf("LOONGARCH illegal instruction: %08X\n", code);
                     return;
             }
         }
@@ -5350,11 +5358,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fj = RegNames[((code >> 5) & 0x1f) + 32];
             const char* fk = RegNames[((code >> 10) & 0x1f) + 32];
             const char* ca = CFregName[(code >> 15) & 0x7];
-            printf("   0x%llx   fsel  %s, %s, %s, %s\n", insstrs, fd, fj, fk, ca);
+            printf("fsel         %s, %s, %s, %s\n", fd, fj, fk, ca);
             return;
         }
         default:
-            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            printf("LOONGARCH illegal instruction: %08X\n", code);
             return;
     }
 
@@ -5369,7 +5377,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldx.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldx.b        %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDX_H:
@@ -5377,7 +5385,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldx.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldx.h        %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDX_W:
@@ -5385,7 +5393,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldx.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldx.w        %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDX_D:
@@ -5393,7 +5401,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldx.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldx.d        %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STX_B:
@@ -5401,7 +5409,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stx.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stx.b        %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STX_H:
@@ -5409,7 +5417,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stx.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stx.h        %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STX_W:
@@ -5417,7 +5425,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stx.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stx.w        %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STX_D:
@@ -5425,7 +5433,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stx.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stx.d        %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDX_BU:
@@ -5433,7 +5441,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldx.bu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldx.bu       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDX_HU:
@@ -5441,7 +5449,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldx.hu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldx.hu       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDX_WU:
@@ -5449,18 +5457,18 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldx.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldx.wu       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_PRELDX:
-            assert(!"unimplemented on loongarch yet!");
+            NYI_LOONGARCH64("unused instr LA_3R_PRELDX");
             return;
         case LA_3R_FLDX_S:
         {
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fldx.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fldx.s       %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FLDX_D:
@@ -5468,7 +5476,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fldx.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fldx.d       %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FSTX_S:
@@ -5476,7 +5484,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fstx.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fstx.s       %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FSTX_D:
@@ -5484,7 +5492,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fstx.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fstx.d       %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_W:
@@ -5492,7 +5500,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amswap.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amswap.w     %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_D:
@@ -5500,7 +5508,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amswap.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amswap.d     %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_W:
@@ -5508,7 +5516,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amadd.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amadd.w      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_D:
@@ -5516,7 +5524,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amadd.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amadd.d      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_W:
@@ -5524,7 +5532,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amand.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amand.w      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_D:
@@ -5532,7 +5540,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amand.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amand.d      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_W:
@@ -5540,7 +5548,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amor.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amor.w       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_D:
@@ -5548,7 +5556,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amor.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amor.d       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_W:
@@ -5556,7 +5564,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amxor.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amxor.w      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_D:
@@ -5564,7 +5572,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amxor.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amxor.d      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_W:
@@ -5572,7 +5580,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammax.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammax.w      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_D:
@@ -5580,7 +5588,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammax.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammax.d      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_W:
@@ -5588,7 +5596,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammin.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammin.w      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_D:
@@ -5596,7 +5604,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammin.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammin.d      %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_WU:
@@ -5604,7 +5612,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammax.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammax.wu     %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DU:
@@ -5612,7 +5620,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammax.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammax.du     %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_WU:
@@ -5620,7 +5628,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammin.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammin.wu     %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DU:
@@ -5628,7 +5636,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammin.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammin.du     %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_DB_W:
@@ -5636,7 +5644,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amswap_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amswap_db.w  %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMSWAP_DB_D:
@@ -5644,7 +5652,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amswap_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amswap_db.d  %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_DB_W:
@@ -5652,7 +5660,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amadd_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amadd_db.w   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMADD_DB_D:
@@ -5660,7 +5668,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amadd_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amadd_db.d   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_DB_W:
@@ -5668,7 +5676,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amand_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amand_db.w   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMAND_DB_D:
@@ -5676,7 +5684,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amand_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amand_db.d   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_DB_W:
@@ -5684,7 +5692,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amor_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amor_db.w    %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMOR_DB_D:
@@ -5692,7 +5700,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amor_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amor_db.d    %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_DB_W:
@@ -5700,7 +5708,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amxor_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amxor_db.w   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMXOR_DB_D:
@@ -5708,7 +5716,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   amxor_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("amxor_db.d   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_W:
@@ -5716,7 +5724,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammax_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammax_db.w   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_D:
@@ -5724,7 +5732,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammax_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammax_db.d   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_W:
@@ -5732,7 +5740,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammin_db.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammin_db.w   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_D:
@@ -5740,7 +5748,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammin_db.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammin_db.d   %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_WU:
@@ -5748,7 +5756,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammax_db.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammax_db.wu  %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMAX_DB_DU:
@@ -5756,7 +5764,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammax_db.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammax_db.du  %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_WU:
@@ -5764,7 +5772,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammin_db.wu  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammin_db.wu  %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_AMMIN_DB_DU:
@@ -5772,19 +5780,19 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ammin_db.du  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ammin_db.du  %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_OP_DBAR:
         {
             unsigned int hint = code & 0x7fff;
-            printf("   0x%llx   dbar  0x%x\n", insstrs, hint);
+            printf("dbar         0x%x\n", hint);
             return;
         }
         case LA_OP_IBAR:
         {
             unsigned int hint = code & 0x7fff;
-            printf("   0x%llx   ibar  0x%x\n", insstrs, hint);
+            printf("ibar         0x%x\n", hint);
             return;
         }
         case LA_3R_FLDGT_S:
@@ -5792,7 +5800,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fldgt.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fldgt.s      %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FLDGT_D:
@@ -5800,7 +5808,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fldgt.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fldgt.d      %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FLDLE_S:
@@ -5808,7 +5816,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fldle.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fldle.s      %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FLDLE_D:
@@ -5816,7 +5824,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fldle.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fldle.d      %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FSTGT_S:
@@ -5824,7 +5832,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fstgt.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fstgt.s      %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FSTGT_D:
@@ -5832,7 +5840,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fstgt.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fstgt.d      %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FSTLE_S:
@@ -5840,7 +5848,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fstle.s  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fstle.s      %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_FSTLE_D:
@@ -5848,7 +5856,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* fd = RegNames[(code & 0x1f) + 32];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   fstle.d  %s, %s, %s\n", insstrs, fd, rj, rk);
+            printf("fstle.d      %s, %s, %s\n", fd, rj, rk);
             return;
         }
         case LA_3R_LDGT_B:
@@ -5856,7 +5864,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldgt.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldgt.b       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDGT_H:
@@ -5864,7 +5872,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldgt.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldgt.h       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDGT_W:
@@ -5872,7 +5880,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldgt.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldgt.w       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDGT_D:
@@ -5880,7 +5888,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldgt.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldgt.d       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_B:
@@ -5888,7 +5896,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldle.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldle.b       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_H:
@@ -5896,7 +5904,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldle.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldle.h       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_W:
@@ -5904,7 +5912,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldle.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldle.w       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_LDLE_D:
@@ -5912,7 +5920,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   ldle.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("ldle.d       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STGT_B:
@@ -5920,7 +5928,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stgt.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stgt.b       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STGT_H:
@@ -5928,7 +5936,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stgt.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stgt.h       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STGT_W:
@@ -5936,7 +5944,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stgt.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stgt.w       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STGT_D:
@@ -5944,7 +5952,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stgt.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stgt.d       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STLE_B:
@@ -5952,7 +5960,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stle.b  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stle.b       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STLE_H:
@@ -5960,7 +5968,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stle.h  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stle.h       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STLE_W:
@@ -5968,7 +5976,7 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stle.w  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stle.w       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         case LA_3R_STLE_D:
@@ -5976,11 +5984,11 @@ void emitter::emitDisInsName(code_t code, const BYTE* dst, instrDesc* id)
             const char* rd = RegNames[code & 0x1f];
             const char* rj = RegNames[(code >> 5) & 0x1f];
             const char* rk = RegNames[(code >> 10) & 0x1f];
-            printf("   0x%llx   stle.d  %s, %s, %s\n", insstrs, rd, rj, rk);
+            printf("stle.d       %s, %s, %s\n", rd, rj, rk);
             return;
         }
         default:
-            printf("LOONGARCH illegal instruction: 0x%08x\n", code);
+            printf("LOONGARCH illegal instruction: %08X\n", code);
             return;
     }
 }
@@ -6009,8 +6017,11 @@ void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz)
 
 void emitter::emitDispIns(
     instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
-{ // not used on loongarch64.
-    printf("------------not implements emitDispIns() for loongarch64!!!\n");
+{
+    // LA implements this similar by `emitter::emitDisInsName`.
+    // For LA maybe the `emitDispIns` is over complicate.
+    // The `emitter::emitDisInsName` is focused on the most important for debugging.
+    NYI_LOONGARCH64("LA not used the emitter::emitDispIns");
 }
 
 /*****************************************************************************
@@ -6278,7 +6289,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
 
 regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
 {
-    NYI_LOONGARCH64("emitInsBinary-----unimplemented on LOONGARCH64 yet----");
+    NYI_LOONGARCH64("emitInsBinary-----unused");
     return REG_R0;
 }
 
diff --git a/src/coreclr/jit/emitloongarch64.h b/src/coreclr/jit/emitloongarch64.h
index 9b79d544a9a3a..d7e7cc5450acb 100644
--- a/src/coreclr/jit/emitloongarch64.h
+++ b/src/coreclr/jit/emitloongarch64.h
@@ -28,8 +28,7 @@ struct CnsVal
 const char* emitFPregName(unsigned reg, bool varName = true);
 const char* emitVectorRegName(regNumber reg);
 
-// NOTE: At least 32bytes within dst.
-void emitDisInsName(code_t code, const BYTE* dst, instrDesc* id);
+void emitDisInsName(code_t code, const BYTE* addr, instrDesc* id);
 #endif // DEBUG
 
 void emitIns_J_cond_la(instruction ins, BasicBlock* dst, regNumber reg1 = REG_R0, regNumber reg2 = REG_R0);

From 7280c46b4b707d13145810578785243da15f53e6 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Wed, 23 Mar 2022 15:24:55 +0800
Subject: [PATCH 38/46] [LoongArch64] remove the optimization for type-cast
 which depends on LoongArch64.

---
 src/coreclr/jit/emit.cpp      |   4 +-
 src/coreclr/jit/gentree.cpp   |   9 --
 src/coreclr/jit/importer.cpp  | 226 +++-------------------------------
 src/coreclr/jit/morph.cpp     |  52 +-------
 src/coreclr/jit/optimizer.cpp |  17 ---
 5 files changed, 21 insertions(+), 287 deletions(-)

diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index c8f0b0701a70a..79d63b659c633 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -744,6 +744,7 @@ insGroup* emitter::emitSavIG(bool emitAdd)
         ig->igFlags |= IGF_BYREF_REGS;
 
         // We'll allocate extra space (DWORD aligned) to record the GC regs
+
         gs += sizeof(int);
     }
 
@@ -756,6 +757,7 @@ insGroup* emitter::emitSavIG(bool emitAdd)
     if (ig->igFlags & IGF_BYREF_REGS)
     {
         // Record the byref regs in front the of the instructions
+
         *castto(id, unsigned*)++ = (unsigned)emitInitByrefRegs;
     }
 
@@ -4332,7 +4334,6 @@ void emitter::emitJumpDistBind()
                                lstIG->igOffs - adjIG);
                     }
 #endif // DEBUG
-
                     lstIG->igOffs -= adjIG;
                     assert(IsCodeAligned(lstIG->igOffs));
                 } while (lstIG != jmpIG);
@@ -4846,7 +4847,6 @@ void emitter::emitJumpDistBind()
             goto AGAIN;
         }
     }
-
 #ifdef DEBUG
     if (EMIT_INSTLIST_VERBOSE)
     {
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index c678219c21db5..539d58688c969 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -13806,16 +13806,7 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree)
 
         case TYP_INT:
 
-#ifdef TARGET_LOONGARCH64
-            // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-            // 64bits-width register which is unlike the AMD64 and ARM64.
-            // And the INT type instruction will be signed-extend by default.
-            // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-            // will be signed-extend by default.
-            assert(tree->TypeIs(TYP_INT, TYP_LONG) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY));
-#else
             assert(tree->TypeIs(TYP_INT) || varTypeIsGC(tree) || tree->OperIs(GT_MKREFANY));
-#endif
             // No GC pointer types should be folded here...
             assert(!varTypeIsGC(op1->TypeGet()) && !varTypeIsGC(op2->TypeGet()));
 
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index b2e9fced636f5..7a27813cdff6b 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -11430,45 +11430,13 @@ var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTr
 
         if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
         {
-// insert an explicit upcast
-#ifdef TARGET_LOONGARCH64
-            if (op1->TypeGet() == TYP_INT && op1->gtOper == GT_CNS_INT)
-            {
-                // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-                // 64bits-width register which is unlike the AMD64 and ARM64.
-                // And the INT type instruction will be signed-extend by default.
-                // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-                // will be signed-extend by default.
-                op1->AsIntCon()->gtIconVal =
-                    fUnsigned ? (uint32_t)op1->AsIntCon()->gtIconVal : op1->AsIntCon()->gtIconVal;
-                op1->gtType = TYP_LONG;
-            }
-            else
-                *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
-#else
+            // insert an explicit upcast
             op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
-#endif
         }
         else if (genActualType(op2->TypeGet()) != TYP_I_IMPL)
         {
-// insert an explicit upcast
-#ifdef TARGET_LOONGARCH64
-            if (op2->TypeGet() == TYP_INT && op2->gtOper == GT_CNS_INT)
-            {
-                // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-                // 64bits-width register which is unlike the AMD64 and ARM64.
-                // And the INT type instruction will be signed-extend by default.
-                // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-                // will be signed-extend by default.
-                op2->AsIntCon()->gtIconVal =
-                    fUnsigned ? (uint32_t)op2->AsIntCon()->gtIconVal : op2->AsIntCon()->gtIconVal;
-                op2->gtType = TYP_LONG;
-            }
-            else
-                *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
-#else
+            // insert an explicit upcast
             op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, fUnsigned, fUnsigned ? TYP_U_IMPL : TYP_I_IMPL);
-#endif
         }
 
         type = TYP_I_IMPL;
@@ -12670,17 +12638,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 }
 #endif // FEATURE_SIMD
 
-#ifdef TARGET_LOONGARCH64
-                if (prevOpcode == CEE_LDC_I4_0 || prevOpcode == CEE_LDNULL)
-                {
-                    op1->gtType = lclTyp;
-                    op1->gtFlags |= GTF_CONTAINED;
-                }
-                else
-                    op1 = impImplicitIorI4Cast(op1, lclTyp);
-#else
-                op1  = impImplicitIorI4Cast(op1, lclTyp);
-#endif
+                op1 = impImplicitIorI4Cast(op1, lclTyp);
 
 #ifdef TARGET_64BIT
                 // Downcast the TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity
@@ -13560,17 +13518,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 op1 = impPopStack().val; // operand to be shifted
                 impBashVarAddrsToI(op1, op2);
 
-#ifdef TARGET_LOONGARCH64
-                if (op2->gtOper == GT_CNS_INT && op2->AsIntCon()->gtIconVal > 31)
-                {
-                    type = TYP_LONG;
-                }
-                else
-                    type = genActualType(op1->TypeGet());
-#else
                 type = genActualType(op1->TypeGet());
-#endif
-                op1 = gtNewOperNode(oper, type, op1, op2);
+                op1  = gtNewOperNode(oper, type, op1, op2);
 
                 impPushOnStack(op1, tiRetVal);
                 break;
@@ -13777,29 +13726,11 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 // See also identical code in impGetByRefResultType and STSFLD import.
                 if (varTypeIsI(op1) && (genActualType(op2) == TYP_INT))
                 {
-#ifdef TARGET_LOONGARCH64
-                    if (op2->gtOper == GT_CNS_INT)
-                    {
-                        op2->AsIntCon()->gtIconVal =
-                            uns ? (uint32_t)op2->AsIntCon()->gtIconVal : (int32_t)op2->AsIntCon()->gtIconVal;
-                        op2->gtType = TYP_LONG;
-                    }
-                    else
-#endif
-                        op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL);
+                    op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, TYP_I_IMPL);
                 }
                 else if (varTypeIsI(op2) && (genActualType(op1) == TYP_INT))
                 {
-#ifdef TARGET_LOONGARCH64
-                    if (op1->gtOper == GT_CNS_INT)
-                    {
-                        op1->AsIntCon()->gtIconVal =
-                            uns ? (uint32_t)op1->AsIntCon()->gtIconVal : (int32_t)op1->AsIntCon()->gtIconVal;
-                        op1->gtType = TYP_LONG;
-                    }
-                    else
-#endif
-                        op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL);
+                    op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, TYP_I_IMPL);
                 }
 #endif // TARGET_64BIT
 
@@ -13886,18 +13817,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 op1 = impPopStack().val;
 
 #ifdef TARGET_64BIT
-#ifdef TARGET_LOONGARCH64
-                if ((op2->OperGet() == GT_CNS_INT) /* && (op2->AsIntCon()->IconValue() == 0)*/)
-                {
-                    op2->gtType = op1->TypeGet();
-                }
-/*if (op1->OperGet() == GT_CNS_INT)
-{
-    //assert(op1->gtType == op2->TypeGet());
-    //op2->gtType = op1->TypeGet();
-    op1->gtFlags |= GTF_CONTAINED;
-}*/
-#else
                 if ((op1->TypeGet() == TYP_I_IMPL) && (genActualType(op2->TypeGet()) == TYP_INT))
                 {
                     op2 = gtNewCastNode(TYP_I_IMPL, op2, uns, uns ? TYP_U_IMPL : TYP_I_IMPL);
@@ -13906,20 +13825,11 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 {
                     op1 = gtNewCastNode(TYP_I_IMPL, op1, uns, uns ? TYP_U_IMPL : TYP_I_IMPL);
                 }
-#endif
 #endif // TARGET_64BIT
 
-#ifdef TARGET_LOONGARCH64
-                assertImp((genActualType(op1->TypeGet()) == TYP_LONG || genActualType(op1->TypeGet()) == TYP_INT) ||
-                          (genActualType(op2->TypeGet()) == TYP_LONG || genActualType(op2->TypeGet()) == TYP_INT) ||
-                          (genActualType(op1->TypeGet()) == genActualType(op2->TypeGet())) ||
-                          (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) ||
-                          (varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType)));
-#else
                 assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
                           (varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet())) ||
                           (varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType)));
-#endif
 
                 if (opts.OptimizationEnabled() && (block->bbJumpDest == block->bbNext))
                 {
@@ -14160,18 +14070,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                 }
 
                 op1 = impPopStack().val;
-#ifdef TARGET_LOONGARCH64
-                if (!callNode && prevOpcode == CEE_LDC_I4_0)
-                {
-                    assert(op1->gtOper == GT_CNS_INT && op1->AsIntCon()->gtIconVal == 0);
-                    if (varTypeIsFloating(lclTyp))
-                        op1->gtOper = GT_CNS_DBL;
-                    op1->gtType     = genActualType(lclTyp);
-                    impPushOnStack(op1, tiRetVal);
-                    // opcode = CEE_LDC_I4_0;
-                    break;
-                }
-#endif
 
                 impBashVarAddrsToI(op1);
 
@@ -14181,38 +14079,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                     uns = false;
                 }
 
-// At this point uns, ovf, callNode are all set.
-
-#ifdef TARGET_LOONGARCH64
-                if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtOper == GT_CNS_INT)
-                {
-                    switch (lclTyp)
-                    {
-                        case TYP_BYTE:
-                            op1->AsIntCon()->gtIconVal = (int8_t)op1->AsIntCon()->gtIconVal;
-                            break;
-                        case TYP_UBYTE:
-                            op1->AsIntCon()->gtIconVal = (uint8_t)op1->AsIntCon()->gtIconVal;
-                            break;
-                        case TYP_USHORT:
-                            op1->AsIntCon()->gtIconVal = (uint16_t)op1->AsIntCon()->gtIconVal;
-                            break;
-                        case TYP_SHORT:
-                            op1->AsIntCon()->gtIconVal = (short)op1->AsIntCon()->gtIconVal;
-                            break;
-                        default:
-                            assert(!"unexpected type");
-                            return;
-                    }
-
-                    op1->gtType = TYP_INT;
+                // At this point uns, ovf, callNode are all set.
 
-                    impPushOnStack(op1, tiRetVal);
-                    break;
-                }
-                else
-#endif
-                    if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND)
+                if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND)
                 {
                     op2 = op1->AsOp()->gtOp2;
 
@@ -14276,32 +14145,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         op1 = gtNewCastNodeL(type, op1, uns, lclTyp);
                     }
                     else
-#ifdef TARGET_LOONGARCH64
-                        if (type != TYP_LONG)
-                    {
-                        if (!ovfl && op1->gtOper == GT_CNS_INT && op1->TypeGet() == TYP_LONG)
-                        {
-                            assert(lclTyp == TYP_INT || lclTyp == TYP_UINT);
-                            if (lclTyp == TYP_INT)
-                            {
-                                op1->AsIntCon()->gtIconVal = (int32_t)op1->AsIntCon()->gtIconVal;
-                                op1->gtType                = TYP_INT;
-                            }
-                            else if (lclTyp == TYP_UINT)
-                            {
-                                op1->AsIntCon()->gtIconVal = (uint32_t)op1->AsIntCon()->gtIconVal;
-                                op1->gtType                = TYP_UINT;
-                            }
-                            else
-                                op1 = gtNewCastNode(type, op1, uns, lclTyp);
-                        }
-                        else
-                        {
-                            op1 = gtNewCastNode(type, op1, uns, lclTyp);
-                        }
-                    }
-                    else
-#endif
                     {
                         op1 = gtNewCastNode(type, op1, uns, lclTyp);
                     }
@@ -14311,13 +14154,11 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         op1->gtFlags |= (GTF_OVERFLOW | GTF_EXCEPT);
                     }
 
-#ifndef TARGET_LOONGARCH64
                     if (op1->gtGetOp1()->OperIsConst() && opts.OptimizationEnabled())
                     {
                         // Try and fold the introduced cast
                         op1 = gtFoldExprConst(op1);
                     }
-#endif
                 }
 
                 impPushOnStack(op1, tiRetVal);
@@ -15943,9 +15784,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         op2->gtType = TYP_I_IMPL;
                     }
                     else
-#ifdef TARGET_LOONGARCH64
-                        if (genActualType(op2->TypeGet()) != TYP_INT)
-#endif
                     {
                         bool isUnsigned = false;
                         op2             = gtNewCastNode(TYP_I_IMPL, op2, isUnsigned, TYP_I_IMPL);
@@ -17310,21 +17148,12 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
             impBashVarAddrsToI(op2);
             op2 = impImplicitIorI4Cast(op2, info.compRetType);
             op2 = impImplicitR4orR8Cast(op2, info.compRetType);
-// Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF.
-#ifdef TARGET_LOONGARCH64
+            // Note that we allow TYP_I_IMPL<->TYP_BYREF transformation, but only TYP_I_IMPL<-TYP_REF.
             assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
-                      (genTypeStSz(op2->TypeGet()) == genTypeStSz(info.compRetType)) ||
                       ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) ||
                       (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) ||
                       (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) ||
                       (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType)));
-#else
-            assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
-                      ((op2->TypeGet() == TYP_I_IMPL) && TypeIs(info.compRetType, TYP_BYREF)) ||
-                      (op2->TypeIs(TYP_BYREF, TYP_REF) && (info.compRetType == TYP_I_IMPL)) ||
-                      (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) ||
-                      (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType)));
-#endif
 
 #ifdef DEBUG
             if (!isTailCall && opts.compGcChecks && (info.compRetType == TYP_REF))
@@ -18169,17 +17998,9 @@ void Compiler::impImportBlock(BasicBlock* block)
             }
             else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_I_IMPL)
             {
-// Spill clique has decided this should be "native int", but this block only pushes an "int".
-// Insert a sign-extension to "native int" so we match the clique.
-#ifdef TARGET_LOONGARCH64
-                if (tree->gtOper == GT_CNS_INT)
-                {
-                    tree->gtType = TYP_I_IMPL;
-                    tree->SetContained();
-                }
-                else
-#endif
-                    verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
+                // Spill clique has decided this should be "native int", but this block only pushes an "int".
+                // Insert a sign-extension to "native int" so we match the clique.
+                verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
             }
 
             // Consider the case where one branch left a 'byref' on the stack and the other leaves
@@ -18199,17 +18020,9 @@ void Compiler::impImportBlock(BasicBlock* block)
             }
             else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_BYREF)
             {
-// Spill clique has decided this should be "byref", but this block only pushes an "int".
-// Insert a sign-extension to "native int" so we match the clique size.
-#ifdef TARGET_LOONGARCH64
-                if (tree->gtOper == GT_CNS_INT)
-                {
-                    tree->gtType = TYP_I_IMPL;
-                    tree->SetContained();
-                }
-                else
-#endif
-                    verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
+                // Spill clique has decided this should be "byref", but this block only pushes an "int".
+                // Insert a sign-extension to "native int" so we match the clique size.
+                verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, false, TYP_I_IMPL);
             }
 
 #endif // TARGET_64BIT
@@ -20934,13 +20747,8 @@ bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName)
             return false;
     }
 #elif defined(TARGET_LOONGARCH64)
-    switch (intrinsicName)
-    {
-        // LOONGARCH64: will amend in the future
-
-        default:
-            return false;
-    }
+    // TODO-LoongArch64: add some instrinsics.
+    return false;
 #else
     // TODO: This portion of logic is not implemented for other arch.
     // The reason for returning true is that on all other arch the only intrinsic
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 9932390b5f48b..84a75fb85c879 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -5235,7 +5235,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTreeCall::Use*
     GenTree* arg     = fgMakeTmpArgNode(argEntry);
 
     // Change the expression to "(tmp=val),tmp"
-    arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
+    arg                                  = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
 
 #endif // FEATURE_FIXED_OUT_ARGS
 
@@ -5414,17 +5414,6 @@ void Compiler::fgMoveOpsLeft(GenTree* tree)
             noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
             new_op1->gtType = TYP_I_IMPL;
         }
-#ifdef TARGET_LOONGARCH64
-        else if ((op1->TypeGet() == TYP_LONG) && (ad2->TypeGet() == TYP_INT))
-        {
-            // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-            // 64bits-width register which is unlike the AMD64 and ARM64.
-            // And the INT type instruction will be signed-extend by default.
-            // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-            // will be signed-extend by default.
-            new_op1->gtType = TYP_LONG;
-        }
-#endif
 
         // If new_op1 is a new expression. Assign it a new unique value number.
         // vnStore is null before the ValueNumber phase has run
@@ -5719,14 +5708,6 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
             noway_assert(index2 != nullptr);
         }
 
-#ifdef TARGET_LOONGARCH64
-        // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-        // 64bits-width register which is unlike the AMD64 and ARM64.
-        // And the INT type instruction will be signed-extend by default.
-        // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-        // will be signed-extend by default.
-        GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)lenOffs, compCurBB);
-#else
         // Next introduce a GT_BOUNDS_CHECK node
         var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
 
@@ -5746,7 +5727,6 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
         {
             arrLen = gtNewCastNode(bndsChkType, arrLen, true, bndsChkType);
         }
-#endif
 
         GenTreeBoundsChk* arrBndsChk = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(index, arrLen, SCK_RNGCHK_FAIL);
 
@@ -5764,13 +5744,7 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
     GenTree* addr;
 
 #ifdef TARGET_64BIT
-#ifndef TARGET_LOONGARCH64
     // Widen 'index' on 64-bit targets
-    // But For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-    // 64bits-width register which is unlike the AMD64 and ARM64.
-    // And the INT type instruction will be signed-extend by default.
-    // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-    // will be signed-extend by default.
     if (index->TypeGet() != TYP_I_IMPL)
     {
         if (index->OperGet() == GT_CNS_INT)
@@ -5782,7 +5756,6 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
             index = gtNewCastNode(TYP_I_IMPL, index, true, TYP_I_IMPL);
         }
     }
-#endif
 #endif // TARGET_64BIT
 
     /* Scale the index value if necessary */
@@ -14710,17 +14683,6 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
             // be in a fully-interruptible code region.
             if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet()))
             {
-#ifdef TARGET_LOONGARCH64
-                // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-                // 64bits-width register which is unlike the AMD64 and ARM64.
-                // And the INT type instruction will be signed-extend by default.
-                // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-                // will be signed-extend by default.
-                if (op2->TypeGet() == TYP_LONG)
-                {
-                    op1->gtType = TYP_LONG;
-                }
-#endif
                 tree->gtOp2 = ad2;
 
                 op1->AsOp()->gtOp2 = op2;
@@ -18479,18 +18441,8 @@ GenTree* Compiler::fgMorphImplicitByRefArgs(GenTree* tree, bool isAddr)
 //
 void Compiler::fgAddFieldSeqForZeroOffset(GenTree* addr, FieldSeqNode* fieldSeqZero)
 {
-// We expect 'addr' to be an address at this point.
-#ifdef TARGET_LOONGARCH64
-    // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-    // 64bits-width register which is unlike the AMD64 and ARM64.
-    // And the INT type instruction will be signed-extend by default.
-    // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-    // will be signed-extend by default.
-    assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_INT ||
-           addr->TypeGet() == TYP_REF);
-#else
+    // We expect 'addr' to be an address at this point.
     assert(addr->TypeGet() == TYP_BYREF || addr->TypeGet() == TYP_I_IMPL || addr->TypeGet() == TYP_REF);
-#endif
 
     // Tunnel through any commas.
     const bool commaOnly = true;
diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp
index 1ad459299dc76..fe8a092a191ce 100644
--- a/src/coreclr/jit/optimizer.cpp
+++ b/src/coreclr/jit/optimizer.cpp
@@ -5092,14 +5092,7 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu
     unsigned   kind;
 
     noway_assert(tree);
-#ifndef TARGET_LOONGARCH64
-    // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-    // 64bits-width register which is unlike the AMD64 and ARM64.
-    // And the INT type instruction will be signed-extend by default.
-    // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-    // will be signed-extend by default. So `LONG != INT(but default is LONG)`
     noway_assert(genActualType(tree->gtType) == genActualType(srct));
-#endif
 
     /* Assume we're only handling integer types */
     noway_assert(varTypeIsIntegral(srct));
@@ -5267,18 +5260,8 @@ bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, Valu
         switch (tree->gtOper)
         {
             case GT_AND:
-#ifdef TARGET_LOONGARCH64
-                // For LoongArch64's instructions operation of the 64bits and 32bits using the whole
-                // 64bits-width register which is unlike the AMD64 and ARM64.
-                // And the INT type instruction will be signed-extend by default.
-                // e.g. 'ld_w $r4, $5, 4' and `addi_w $r4,$r5,-1` the result of INT
-                // will be signed-extend by default. So `LONG != INT(but default is LONG)`
-                noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op1->gtType)));
-                noway_assert(genTypeSize(genActualType(tree->gtType)) >= genTypeSize(genActualType(op2->gtType)));
-#else
                 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
                 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
-#endif
 
                 GenTree* opToNarrow;
                 opToNarrow = nullptr;

From e1b5f9da81bae7cc2f9296ea5658f49ad1be8449 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 25 Mar 2022 10:24:15 +0800
Subject: [PATCH 39/46] [LoongArch64] ament the code for CR.

---
 src/coreclr/jit/codegenlinear.cpp |  5 ++--
 src/coreclr/jit/compiler.cpp      | 45 ++++++-------------------------
 src/coreclr/jit/gentree.cpp       | 33 +++++++++++++----------
 src/coreclr/jit/instr.cpp         | 20 ++------------
 src/coreclr/jit/lower.cpp         |  4 ---
 src/coreclr/jit/morph.cpp         | 13 ++++-----
 src/coreclr/jit/valuenum.cpp      |  7 -----
 7 files changed, 39 insertions(+), 88 deletions(-)

diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index 6b7c1f1fa2c45..e03e83d49c553 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -2517,8 +2517,9 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
         }
 
 #ifdef TARGET_LOONGARCH64
-        m_extendKind = castUnsigned ? ZERO_EXTEND_INT : SIGN_EXTEND_INT;
-        cast->gtFlags |= castUnsigned ? GTF_UNSIGNED : GTF_EMPTY;
+        // For LoongArch64's ISA which is same with the MIPS64 ISA, even the instructions of 32bits operation need
+        // the upper 32bits be sign-extended to 64 bits.
+        m_extendKind = SIGN_EXTEND_INT;
 #else
         m_extendKind = COPY;
 #endif
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 1b6fc4255b1eb..5d6fff9b367a7 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -749,34 +749,14 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
                     useType         = TYP_UNKNOWN;
                 }
 
-#elif defined(TARGET_X86) || defined(TARGET_ARM)
+#elif defined(TARGET_X86) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64)
 
                 // Otherwise we pass this struct by value on the stack
                 // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
+                // On LOONGARCH64 struct that is 1-16 bytes is passed by value in one/two register(s)
                 howToPassStruct = SPK_ByValue;
                 useType         = TYP_STRUCT;
 
-#elif defined(TARGET_LOONGARCH64)
-                // Structs that are pointer sized or smaller.
-                // assert(structSize > TARGET_POINTER_SIZE);
-
-                // On LOONGARCH64 structs that are 1-16 bytes are passed by value in one/multiple register(s)
-                if (structSize <= (TARGET_POINTER_SIZE * 2))
-                {
-                    // setup wbPassType and useType indicate that this is passed by value in multiple registers
-                    //  (when all of the parameters registers are used, then the stack will be used)
-                    howToPassStruct = SPK_ByValue;
-                    useType         = TYP_STRUCT;
-                }
-                else // a structSize that is 17-32 bytes in size
-                {
-                    // Otherwise we pass this struct by reference to a copy
-                    // setup wbPassType and useType indicate that this is passed using one register
-                    //  (by reference to a copy)
-                    howToPassStruct = SPK_ByReference;
-                    useType         = TYP_UNKNOWN;
-                }
-
 #else //  TARGET_XXX
 
                 noway_assert(!"Unhandled TARGET in getArgTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
@@ -1084,21 +1064,9 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
 
 #elif defined(TARGET_LOONGARCH64)
 
-                // On LOONGARCH64 structs that are 1-16 bytes are returned by value in one/multiple register(s)
-                if (structSize <= (TARGET_POINTER_SIZE * 2))
-                {
-                    // setup wbPassType and useType indicate that this is return by value in multiple registers
-                    howToReturnStruct = SPK_ByValue;
-                    useType           = TYP_STRUCT;
-                }
-                else // a structSize that is 17-32 bytes in size
-                {
-                    // Otherwise we return this struct using a return buffer/byreference.
-                    // setup wbPassType and useType indicate that this is returned using a return buffer register
-                    //  (reference to a return buffer)
-                    howToReturnStruct = SPK_ByReference;
-                    useType           = TYP_UNKNOWN;
-                }
+                // On LOONGARCH64 struct that is 1-16 bytes is returned by value in one/two register(s)
+                howToReturnStruct = SPK_ByValue;
+                useType           = TYP_STRUCT;
 
 #else //  TARGET_XXX
 
@@ -2276,8 +2244,11 @@ void Compiler::compSetProcessor()
         info.genCPU = CPU_X86_PENTIUM_4;
     else
         info.genCPU = CPU_X86;
+
 #elif defined(TARGET_LOONGARCH64)
+
     info.genCPU = CPU_LOONGARCH64;
+
 #endif
 
     //
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 539d58688c969..26049b26f4fe2 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -3208,11 +3208,11 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ
             *pCostEx += idx->GetCostEx();
             *pCostSz += idx->GetCostSz();
         }
-        // TODO-LOONGARCH64: workround, should amend for LoongArch64.
         if (cns != 0)
         {
-            if (cns >= (4096 * genTypeSize(type)))
+            if (!emitter::isValidSimm12(cns))
             {
+                // TODO-LoongArch64-CQ: tune for LoongArch64.
                 *pCostEx += 1;
                 *pCostSz += 4;
             }
@@ -3632,11 +3632,15 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                 goto COMMON_CNS;
 
 #elif defined(TARGET_LOONGARCH64)
+            // TODO-LoongArch64-CQ: tune the costs.
             case GT_CNS_STR:
+                costEx = IND_COST_EX + 2;
+                costSz = 4;
+                goto COMMON_CNS;
+
             case GT_CNS_LNG:
             case GT_CNS_INT:
-                // TODO-LOONGARCH64: workround, should amend for LoongArch64.
-                costEx = 4;
+                costEx = 1;
                 costSz = 4;
                 goto COMMON_CNS;
 #else
@@ -3701,7 +3705,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                     costSz = 4;
                 }
 #elif defined(TARGET_LOONGARCH64)
-                // TODO-LOONGARCH64: workround, should amend for LoongArch64.
+                // TODO-LoongArch64-CQ: tune the costs.
                 costEx = 2;
                 costSz = 8;
 #else
@@ -3878,14 +3882,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                         costSz = 6;
                     }
 #elif defined(TARGET_LOONGARCH64)
-                    // TODO-LOONGARCH64: workround, should amend for LoongArch64.
+                    // TODO-LoongArch64-CQ: tune the costs.
                     costEx = 1;
-                    costSz = 2;
-                    if (isflt || varTypeIsFloating(op1->TypeGet()))
-                    {
-                        costEx = 2;
-                        costSz = 4;
-                    }
+                    costSz = 4;
 #else
 #error "Unknown TARGET"
 #endif
@@ -22273,11 +22272,17 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx) const
     }
     else
     {
-        noway_assert(idx < 2); // Up to 2 return registers for two-float-field structs
+        noway_assert(idx == 1); // Up to 2 return registers for two-float-field structs
+
+        // If the first return register is from the same register file, return the one next to it.
         if (varTypeIsIntegralOrI(regType))
+        {
             resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_INTRET_1 : REG_INTRET; // A0 or A1
-        else // if (!varTypeIsIntegralOrI(regType))
+        }
+        else // varTypeUsesFloatReg(regType)
+        {
             resultReg = varTypeIsIntegralOrI(GetReturnRegType(0)) ? REG_FLOATRET : REG_FLOATRET_1; // F0 or F1
+        }
     }
 
 #endif // TARGET_XXX
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index 6406ceb3715da..2d797889323ce 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -906,19 +906,7 @@ void CodeGen::inst_RV_TT(instruction ins,
                     regSet.verifyRegUsed(regTmp);
                     return;
             }
-#else // !TARGET_ARM
-#ifdef TARGET_LOONGARCH64
-            // For LoongArch64-ABI, the float arg might be passed by integer register,
-            // when there is no float register left but there is integer register(s) left.
-            if (emitter::isFloatReg(reg))
-            {
-                assert((ins == INS_fld_d) || (ins == INS_fld_s));
-            }
-            else if (emitter::isGeneralRegister(reg) && (ins != INS_lea))
-            {
-                ins = size == EA_4BYTE ? INS_ld_w : INS_ld_d;
-            }
-#endif
+#else  // !TARGET_ARM
             GetEmitter()->emitIns_R_S(ins, size, reg, varNum, offs);
             return;
 #endif // !TARGET_ARM
@@ -1725,10 +1713,6 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
     {
         ins = INS_ld_w;
     }
-    else if (TYP_UINT == srcType)
-    {
-        ins = INS_ld_wu;
-    }
     else
     {
         ins = INS_ld_d; // default ld_d.
@@ -1956,7 +1940,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
         ins = aligned ? INS_stx_b : INS_st_b;
     else if (varTypeIsShort(dstType))
         ins = aligned ? INS_stx_h : INS_st_h;
-    else if ((TYP_INT == dstType) || (TYP_UINT == dstType))
+    else if (TYP_INT == dstType)
         ins = aligned ? INS_stx_w : INS_st_w;
     else
         ins = aligned ? INS_stx_d : INS_st_d;
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 79ca6fb190527..77deae14efc45 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -5874,11 +5874,7 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
     GenTree* dividend = divMod->gtGetOp1();
     GenTree* divisor  = divMod->gtGetOp2();
 
-#if defined(TARGET_LOONGARCH64)
-    const var_types type = genActualType(divMod->TypeGet());
-#else
     const var_types type = divMod->TypeGet();
-#endif
     assert((type == TYP_INT) || (type == TYP_LONG));
 
 #if defined(USE_HELPERS_FOR_INT_DIV)
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 84a75fb85c879..2902c35b9c496 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -3815,17 +3815,18 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                 assert(!"Structs are not passed by reference on x64/ux");
 #endif // UNIX_AMD64_ABI
             }
-#if defined(DEBUG) && defined(TARGET_LOONGARCH64)
-            else if ((structBaseType == TYP_STRUCT) && (originalSize == TARGET_POINTER_SIZE) && (size == 2))
-            {
-                DEBUG_ARG_SLOTS_ASSERT(size == argEntry->numRegs);
-            }
-#endif
             else // This is passed by value.
             {
+#if defined(TARGET_LOONGARCH64)
+                // For LoongArch64 the struct {float a; float b;} can be passed by two float registers.
+                DEBUG_ARG_SLOTS_ASSERT((size == roundupSize / TARGET_POINTER_SIZE) ||
+                                       ((structBaseType == TYP_STRUCT) && (originalSize == TARGET_POINTER_SIZE) &&
+                                        (size == 2) && (size == argEntry->numRegs)));
+#else
                 // Check to see if we can transform this into load of a primitive type.
                 // 'size' must be the number of pointer sized items
                 DEBUG_ARG_SLOTS_ASSERT(size == roundupSize / TARGET_POINTER_SIZE);
+#endif
 
                 structSize           = originalSize;
                 unsigned passingSize = originalSize;
diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp
index 3f2033446ed08..779813c72eabc 100644
--- a/src/coreclr/jit/valuenum.cpp
+++ b/src/coreclr/jit/valuenum.cpp
@@ -2845,14 +2845,7 @@ ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, Valu
             }
             else
             {
-#ifdef TARGET_LOONGARCH64
-                // For LoongArch64, the int32 will signed-extend default,
-                // e.g. `ld_w $r4, $r5, 4` loading a int32 from the addr `$r5+4`.
-                // So there is no need to signed-extend.
-                assert(typ == TYP_INT || typ == TYP_LONG);
-#else
                 assert(typ == TYP_INT);
-#endif
                 int resultVal = EvalOp<int>(func, arg0Val, arg1Val);
                 // Bin op on a handle results in a handle.
                 ValueNum handleVN = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN;

From 81120de98afd286e277f3bee2c46d48abf8c35ab Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Wed, 30 Mar 2022 14:23:27 +0800
Subject: [PATCH 40/46] [LoongArch64] amend some code for CR.

---
 src/coreclr/jit/compiler.cpp  | 20 +++++++++-----------
 src/coreclr/jit/morph.cpp     | 23 +++++++----------------
 src/coreclr/jit/scopeinfo.cpp |  6 +++++-
 3 files changed, 21 insertions(+), 28 deletions(-)

diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 5d6fff9b367a7..4849d73ae1f7a 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -902,16 +902,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
         howToReturnStruct   = SPK_ByReference;
         useType             = TYP_UNKNOWN;
     }
-#endif
-    if (TargetOS::IsWindows && !TargetArchitecture::IsArm32 && callConvIsInstanceMethodCallConv(callConv) &&
-        !isNativePrimitiveStructType(clsHnd))
-    {
-        canReturnInRegister = false;
-        howToReturnStruct   = SPK_ByReference;
-        useType             = TYP_UNKNOWN;
-    }
-
-#ifdef TARGET_LOONGARCH64
+#elif TARGET_LOONGARCH64
     if (structSize <= (TARGET_POINTER_SIZE * 2))
     {
         uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(clsHnd);
@@ -927,7 +918,14 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
             useType           = TYP_STRUCT;
         }
     }
-#endif // TARGET_LOONGARCH64
+#endif
+    if (TargetOS::IsWindows && !TargetArchitecture::IsArm32 && callConvIsInstanceMethodCallConv(callConv) &&
+        !isNativePrimitiveStructType(clsHnd))
+    {
+        canReturnInRegister = false;
+        howToReturnStruct   = SPK_ByReference;
+        useType             = TYP_UNKNOWN;
+    }
 
     // Check for cases where a small struct is returned in a register
     // via a primitive type.
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 2902c35b9c496..7ccbea685939b 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -12632,24 +12632,11 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
 
             break;
 
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
         case GT_DIV:
-            if (!varTypeIsFloating(tree->gtType))
-            {
-                // Codegen for this instruction needs to be able to throw two exceptions:
-                fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW);
-                fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
-            }
-            break;
-        case GT_UDIV:
-            // Codegen for this instruction needs to be able to throw one exception:
-            fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
-            break;
-#endif
-
 #ifdef TARGET_LOONGARCH64
-        case GT_DIV:
         case GT_MOD:
+#endif
             if (!varTypeIsFloating(tree->gtType))
             {
                 // Codegen for this instruction needs to be able to throw two exceptions:
@@ -12658,11 +12645,15 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
             }
             break;
         case GT_UDIV:
+#ifdef TARGET_LOONGARCH64
         case GT_UMOD:
+#endif
             // Codegen for this instruction needs to be able to throw one exception:
             fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO);
             break;
-#endif
+
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+
         case GT_ADD:
 
         CM_OVF_OP:
diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp
index 2e315d2faa84b..1c0d6679cb6a7 100644
--- a/src/coreclr/jit/scopeinfo.cpp
+++ b/src/coreclr/jit/scopeinfo.cpp
@@ -1609,17 +1609,21 @@ void CodeGen::psiBegProlog()
                     noway_assert(EA_SIZE_IN_BYTES(lclVarDsc->lvSize()) <= 16);
                     if (emitter::isFloatReg(lclVarDsc->GetArgReg()))
                     {
-                        // regType = lclVarDsc->lvIs4Field1 ? TYP_FLOAT : TYP_DOUBLE;
                         regType = TYP_DOUBLE;
                     }
                     else
+                    {
                         regType = lclVarDsc->GetLayout()->GetGCPtrType(0);
+                    }
                 }
                 else
                 {
                     regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet());
                     if (emitter::isGeneralRegisterOrR0(lclVarDsc->GetArgReg()) && isFloatRegType(regType))
+                    {
+                        // For LoongArch64's ABI, the float args may be passed by integer register.
                         regType = TYP_LONG;
+                    }
                 }
 #else
                 var_types regType = compiler->mangleVarArgsType(lclVarDsc->TypeGet());

From f369343267462878a7d6053e782ba8141ea53521 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Thu, 31 Mar 2022 11:16:54 +0800
Subject: [PATCH 41/46] [LoongArch64] amend some code for CR round2.

---
 src/coreclr/jit/codegenloongarch64.cpp |  18 ++--
 src/coreclr/jit/emitloongarch64.cpp    |  46 ++--------
 src/coreclr/jit/lclvars.cpp            | 122 ++++++++++++-------------
 src/coreclr/jit/lower.cpp              |   7 +-
 src/coreclr/jit/lowerloongarch64.cpp   |  63 ++++++-------
 src/coreclr/jit/lsraloongarch64.cpp    |   1 +
 6 files changed, 108 insertions(+), 149 deletions(-)

diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index b6a7442c19dce..e5b4be91ba713 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1876,7 +1876,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
     genProduceReg(treeNode);
 }
 
-// Generate code for ADD, SUB, MUL, AND, OR and XOR
+// Generate code for ADD, SUB, MUL, AND, AND_NOT, OR and XOR
 // This method is expected to have called genConsumeOperands() before calling it.
 void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
 {
@@ -1884,7 +1884,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
     regNumber        targetReg = treeNode->GetRegNum();
     emitter*         emit      = GetEmitter();
 
-    assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_AND || oper == GT_OR || oper == GT_XOR);
+    assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_AND_NOT, GT_OR, GT_XOR));
 
     GenTree*    op1 = treeNode->gtGetOp1();
     GenTree*    op2 = treeNode->gtGetOp2();
@@ -2548,19 +2548,18 @@ void CodeGen::genCodeForNegNot(GenTree* tree)
 //
 void CodeGen::genCodeForBswap(GenTree* tree)
 {
-    assert(!"unimpleement on LOONGARCH64 yet");
+    NYI_LOONGARCH64("genCodeForBswap unimpleement yet");
 }
 
 //------------------------------------------------------------------------
-// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node. We don't see MOD:
-// (1) integer MOD is morphed into a sequence of sub, mul, div in fgMorph;
-// (2) float/double MOD is morphed into a helper call by front-end.
+// genCodeForDivMod: Produce code for a GT_DIV/GT_UDIV node.
+// (1) float/double MOD is morphed into a helper call by front-end.
 //
 // Arguments:
 //    tree - the node
 //
 void CodeGen::genCodeForDivMod(GenTreeOp* tree)
-{ // can amend further.
+{
     assert(tree->OperIs(GT_MOD, GT_UMOD, GT_DIV, GT_UDIV));
 
     var_types targetType = tree->TypeGet();
@@ -3355,6 +3354,11 @@ instruction CodeGen::genGetInsForOper(GenTree* treeNode)
                 }
                 break;
 
+            case GT_AND_NOT:
+                assert(!isImmed(treeNode));
+                ins = INS_andn;
+                break;
+
             case GT_OR:
                 isImm = isImmed(treeNode);
                 if (isImm)
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index a3f2a226f8923..0905579406548 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -6395,13 +6395,16 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
     }
 
     if (intConst != nullptr)
-    { // should re-design this case!!! ---2020.04.11.
+    {
         ssize_t imm = intConst->IconValue();
         if (ins == INS_andi || ins == INS_ori || ins == INS_xori)
-            // assert((0 <= imm) && (imm <= 0xfff));
-            assert((-2048 <= imm) && (imm <= 0xfff));
+        {
+            assert(isValidUimm12(imm));
+        }
         else
-            assert((-2049 < imm) && (imm < 2048));
+        {
+            assert(isValidSimm12(imm));
+        }
 
         if (ins == INS_sub_d)
         {
@@ -6420,35 +6423,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
 
         assert(ins == INS_addi_d || ins == INS_addi_w || ins == INS_andi || ins == INS_ori || ins == INS_xori);
 
-        if ((imm < 0) && (ins == INS_andi || ins == INS_ori || ins == INS_xori))
-        {
-            assert(attr == EA_8BYTE || attr == EA_4BYTE);
-            assert(nonIntReg->GetRegNum() != REG_R21);
-
-            emitIns_R_R_I(INS_addi_d, EA_8BYTE, REG_R21, REG_R0, imm);
-
-            if (ins == INS_andi)
-            {
-                ins = INS_and;
-            }
-            else if (ins == INS_ori)
-            {
-                ins = INS_or;
-            }
-            else if (ins == INS_xori)
-            {
-                ins = INS_xor;
-            }
-            else
-            {
-                unreached();
-            }
-
-            emitIns_R_R_R(ins, attr, dst->GetRegNum(), REG_R21, nonIntReg->GetRegNum());
-
-            goto L_Done;
-        }
-
         if (needCheckOv)
         {
             emitIns_R_R_R(INS_or, attr, REG_R21, nonIntReg->GetRegNum(), REG_R0);
@@ -6567,11 +6541,11 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
             }
         }
     }
-    else if (dst->OperGet() == GT_AND || dst->OperGet() == GT_OR || dst->OperGet() == GT_XOR)
+    else if (dst->OperIs(GT_AND, GT_AND_NOT, GT_OR, GT_XOR))
     {
         emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum());
 
-        // NOTE: can/should amend: LOONGARCH needs to sign-extend dst when deal with 32bit data.
+        // TODO-LOONGARCH64-CQ: here sign-extend dst when deal with 32bit data is too conservative.
         if (EA_SIZE(attr) == EA_4BYTE)
             emitIns_R_R_I(INS_slli_w, attr, dst->GetRegNum(), dst->GetRegNum(), 0);
     }
@@ -6713,8 +6687,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
         }
     }
 
-L_Done:
-
     return dst->GetRegNum();
 }
 
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 33adb5285ff1f..f661b0c8f1e69 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -660,14 +660,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         bool      isHfaArg = false;
         var_types hfaType  = TYP_UNDEF;
 
-#if defined(TARGET_LOONGARCH64)
-        uint32_t floatFlags = STRUCT_NO_FLOAT_FIELD;
-        if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES))
-        {
-            floatFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(typeHnd);
-        }
-#endif
-
         // Methods that use VarArg or SoftFP cannot have HFA arguments except
         // Native varargs on arm64 unix use the regular calling convention.
         if (((TargetOS::IsUnix && TargetArchitecture::IsArm64) || !info.compIsVarArgs) && !opts.compUseSoftFP)
@@ -808,23 +800,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         }
 #else // !TARGET_ARM
 
-#if defined(TARGET_LOONGARCH64)
-
-        if (compFeatureArgSplit())
-        {
-            // This does not affect the normal calling convention for LoongArch64!!
-            if (this->info.compIsVarArgs && (argType == TYP_STRUCT))
-            {
-                if (varDscInfo->canEnreg(TYP_INT, 1) &&     // The beginning of the struct can go in a register
-                    !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register
-                {
-                    cSlotsToEnregister = 1; // Force the split
-                }
-            }
-        }
-
-#endif // defined(TARGET_LOONGARCH64)
-
 #if defined(UNIX_AMD64_ABI)
         SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
         if (varTypeIsStruct(argType))
@@ -886,9 +861,16 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
         }
         else
 #elif defined(TARGET_LOONGARCH64)
-        var_types arg1Type = TYP_UNKNOWN;
-        var_types arg2Type = TYP_UNKNOWN;
-        if (floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK)
+        uint32_t  floatFlags          = STRUCT_NO_FLOAT_FIELD;
+        var_types argRegTypeInStruct1 = TYP_UNKNOWN;
+        var_types argRegTypeInStruct2 = TYP_UNKNOWN;
+
+        if ((strip(corInfoType) == CORINFO_TYPE_VALUECLASS) && (argSize <= MAX_PASS_MULTIREG_BYTES))
+        {
+            floatFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(typeHnd);
+        }
+
+        if ((floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) != 0)
         {
             assert(varTypeIsStruct(argType));
             int floatNum = 0;
@@ -896,50 +878,56 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             {
                 assert(argSize <= 8);
                 assert(varDsc->lvExactSize <= argSize);
-                floatNum = 1;
 
-                arg1Type              = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT;
-                canPassArgInRegisters = varDscInfo->canEnreg(arg1Type, 1);
+                floatNum              = 1;
+                canPassArgInRegisters = varDscInfo->canEnreg(argRegTypeInStruct1, 1);
+
+                argRegTypeInStruct1 = (varDsc->lvExactSize == 8) ? TYP_DOUBLE : TYP_FLOAT;
             }
-            else if (floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
+            else if ((floatFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0)
             {
-                arg1Type              = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                arg2Type              = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
                 floatNum              = 2;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 2);
+
+                argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
             }
-            else if (floatFlags & STRUCT_FLOAT_FIELD_FIRST)
+            else if ((floatFlags & STRUCT_FLOAT_FIELD_FIRST) != 0)
             {
                 floatNum              = 1;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
                 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
 
-                arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
             }
-            else if (floatFlags & STRUCT_FLOAT_FIELD_SECOND)
+            else if ((floatFlags & STRUCT_FLOAT_FIELD_SECOND) != 0)
             {
                 floatNum              = 1;
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_DOUBLE, 1);
                 canPassArgInRegisters = canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1);
 
-                arg1Type = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
-                arg2Type = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                argRegTypeInStruct1 = (floatFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
             }
 
             if (!canPassArgInRegisters)
             {
-                assert(floatNum > 0);
+                assert((floatNum == 1) || (floatNum == 2)); // `if ((floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) != 0)`
                 canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
-                arg1Type              = TYP_UNKNOWN;
-                arg2Type              = TYP_UNKNOWN;
+
+                // On LoongArch64, there aren't even any remaining integer registers to pass the arguments.
+                argRegTypeInStruct1 = TYP_UNKNOWN;
+                argRegTypeInStruct2 = TYP_UNKNOWN;
             }
         }
         else
-#endif // defined(UNIX_AMD64_ABI)
+#endif // defined(TARGET_LOONGARCH64)
         {
             canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
 #if defined(TARGET_LOONGARCH64)
+            // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument,
+            // integer registers (if any) are used instead.
             if (!canPassArgInRegisters && varTypeIsFloating(argType))
             {
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, cSlotsToEnregister);
@@ -947,8 +935,10 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             }
             if (!canPassArgInRegisters && (cSlots > 1))
             {
+                // If a struct-arg which needs two registers but only one integer register available,
+                // it has to be split.
                 canPassArgInRegisters = varDscInfo->canEnreg(TYP_I_IMPL, 1);
-                arg1Type              = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN;
+                argRegTypeInStruct1   = canPassArgInRegisters ? TYP_I_IMPL : TYP_UNKNOWN;
             }
 #endif
         }
@@ -981,9 +971,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             }
             else
 #elif defined(TARGET_LOONGARCH64)
-            if (arg1Type != TYP_UNKNOWN)
+            if (argRegTypeInStruct1 != TYP_UNKNOWN)
             {
-                firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg1Type, 1);
+                firstAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct1, 1);
             }
             else
 #endif // defined(TARGET_LOONGARCH64)
@@ -1037,18 +1027,20 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
 #elif defined(TARGET_LOONGARCH64)
             if (argType == TYP_STRUCT)
             {
-                if (arg1Type != TYP_UNKNOWN)
+                if (argRegTypeInStruct1 != TYP_UNKNOWN)
                 {
-                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg1Type));
-                    varDsc->lvIs4Field1 = (int)emitActualTypeSize(arg1Type) == 4 ? 1 : 0;
-                    if (arg2Type != TYP_UNKNOWN)
+                    varDsc->SetArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argRegTypeInStruct1));
+                    varDsc->lvIs4Field1 = (genTypeSize(argRegTypeInStruct1) == 4) ? 1 : 0;
+                    if (argRegTypeInStruct2 != TYP_UNKNOWN)
                     {
-                        firstAllocatedRegArgNum = varDscInfo->allocRegArg(arg2Type, 1);
-                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, arg2Type));
-                        varDsc->lvIs4Field2 = (int)emitActualTypeSize(arg2Type) == 4 ? 1 : 0;
+                        unsigned secondAllocatedRegArgNum = varDscInfo->allocRegArg(argRegTypeInStruct2, 1);
+                        varDsc->SetOtherArgReg(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, argRegTypeInStruct2));
+                        varDsc->lvIs4Field2 = (genTypeSize(argRegTypeInStruct2) == 4) ? 1 : 0;
                     }
                     else if (cSlots > 1)
                     {
+                        // Here a struct-arg which needs two registers but only one integer register available,
+                        // it has to be split. But we reserved extra 8-bytes for the whole struct.
                         varDsc->lvIsSplit = 1;
                         varDsc->SetOtherArgReg(REG_STK);
                         varDscInfo->setAllRegArgUsed(arg1Type);
@@ -1190,18 +1182,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 varDscInfo->setAnyFloatStackArgs();
             }
 
-#elif defined(TARGET_ARM64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
 
             // If we needed to use the stack in order to pass this argument then
             // record the fact that we have used up any remaining registers of this 'type'
-            // This prevents any 'backfilling' from occuring on ARM64
+            // This prevents any 'backfilling' from occuring on ARM64/LoongArch64.
             //
             varDscInfo->setAllRegArgUsed(argType);
 
-#elif defined(TARGET_LOONGARCH64)
-
-            varDscInfo->setAllRegArgUsed(argType);
-
 #endif // TARGET_XXX
 
 #if FEATURE_FASTTAILCALL
@@ -5433,11 +5421,6 @@ void Compiler::lvaFixVirtualFrameOffsets()
             JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta);
             varDsc->SetStackOffset(varDsc->GetStackOffset() + delta);
 
-#if defined(TARGET_LOONGARCH64)
-            if (varDsc->GetStackOffset() >= delta)
-                varDsc->SetStackOffset(varDsc->GetStackOffset() + (varDsc->lvIsSplit ? 8 : 0));
-#endif
-
 #if DOUBLE_ALIGN
             if (genDoubleAlign() && !codeGen->isFramePointerUsed())
             {
@@ -6001,7 +5984,14 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
         }
 
 #elif defined(TARGET_LOONGARCH64)
-// empty for LoongArch64.
+
+        if (varDsc->lvIsSplit)
+        {
+            assert((varDsc->lvType == TYP_STRUCT) && (varDsc->GetOtherArgReg() == REG_STK));
+            // This is a split struct. It will account for an extra (8 bytes) for the whole struct.
+            varDsc->SetStackOffset(varDsc->GetStackOffset() + TARGET_POINTER_SIZE);
+            argOffs += TARGET_POINTER_SIZE;
+        }
 
 #else // TARGET*
 #error Unsupported or unset target architecture
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 77deae14efc45..ddd36057acd6b 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -3232,12 +3232,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
     GenTree* relopOp1 = relop->AsOp()->gtGetOp1();
     GenTree* relopOp2 = relop->AsOp()->gtGetOp2();
 
-    if (relopOp1->IsCnsIntOrI() && relopOp2->IsCnsIntOrI())
-    {
-        relopOp1->SetContained();
-        relopOp2->SetContained();
-    }
-    else if (relop->gtNext == jtrue)
+    if (relop->gtNext == jtrue)
     {
         if (relopOp2->IsCnsIntOrI())
         {
diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp
index 9bd8beca15157..78ac528ba4c64 100644
--- a/src/coreclr/jit/lowerloongarch64.cpp
+++ b/src/coreclr/jit/lowerloongarch64.cpp
@@ -61,8 +61,6 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
 
         // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type.
         target_ssize_t immVal = (target_ssize_t)childNode->AsIntCon()->gtIconVal;
-        emitAttr       attr   = emitActualTypeSize(childNode->TypeGet());
-        emitAttr       size   = EA_SIZE(attr);
 
         switch (parentNode->OperGet())
         {
@@ -84,7 +82,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
             case GT_AND:
             case GT_OR:
             case GT_XOR:
-                return emitter::isValidUimm11(immVal);
+                return emitter::isValidUimm12(immVal);
             case GT_JCMP:
                 assert(((parentNode->gtFlags & GTF_JCMP_TST) == 0) ? (immVal == 0) : isPow2(immVal));
                 return true;
@@ -106,8 +104,10 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
 //------------------------------------------------------------------------
 // LowerMul: Lower a GT_MUL/GT_MULHI/GT_MUL_LONG node.
 //
-// TODO: For LoongArch64 recognized GT_MULs that can be turned into GT_MUL_LONGs, as
-// those are cheaper. Performs contaiment checks.
+// Performs contaiment checks.
+//
+// TODO-LoongArch64-CQ: recognize GT_MULs that can be turned into MUL_LONGs,
+// as those are cheaper.
 //
 // Arguments:
 //    mul - The node to lower
@@ -370,26 +370,13 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
 }
 
 //------------------------------------------------------------------------
-// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
+// ContainBlockStoreAddress: Attempt to contain an address used by an unrolled block store.
 //
 // Arguments:
-//    tree - GT_CAST node to be lowered
-//
-// Return Value:
-//    None.
-//
-// Notes:
-//    Casts from float/double to a smaller int type are transformed as follows:
-//    GT_CAST(float/double, byte)     =   GT_CAST(GT_CAST(float/double, int32), byte)
-//    GT_CAST(float/double, sbyte)    =   GT_CAST(GT_CAST(float/double, int32), sbyte)
-//    GT_CAST(float/double, int16)    =   GT_CAST(GT_CAST(double/double, int32), int16)
-//    GT_CAST(float/double, uint16)   =   GT_CAST(GT_CAST(double/double, int32), uint16)
-//
-//    Note that for the overflow conversions we still depend on helper calls and
-//    don't expect to see them here.
-//    i) GT_CAST(float/double, int type with overflow detection)
+//    blkNode - the block store node
+//    size - the block size
+//    addr - the address node to try to contain
 //
-
 void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr)
 {
     assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll));
@@ -429,6 +416,27 @@ void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenT
     addr->SetContained();
 }
 
+//------------------------------------------------------------------------
+// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
+//
+// Arguments:
+//    tree - GT_CAST node to be lowered
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Casts from float/double to a smaller int type are transformed as follows:
+//    GT_CAST(float/double, byte)     =   GT_CAST(GT_CAST(float/double, int32), byte)
+//    GT_CAST(float/double, sbyte)    =   GT_CAST(GT_CAST(float/double, int32), sbyte)
+//    GT_CAST(float/double, int16)    =   GT_CAST(GT_CAST(double/double, int32), int16)
+//    GT_CAST(float/double, uint16)   =   GT_CAST(GT_CAST(double/double, int32), uint16)
+//
+//    Note that for the overflow conversions we still depend on helper calls and
+//    don't expect to see them here.
+//    i) GT_CAST(float/double, int type with overflow detection)
+//
+
 void Lowering::LowerCast(GenTree* tree)
 {
     assert(tree->OperGet() == GT_CAST);
@@ -440,7 +448,6 @@ void Lowering::LowerCast(GenTree* tree)
     GenTree*  op1     = tree->AsOp()->gtOp1;
     var_types dstType = tree->CastToType();
     var_types srcType = genActualType(op1->TypeGet());
-    var_types tmpType = TYP_UNDEF;
 
     if (varTypeIsFloating(srcType))
     {
@@ -451,16 +458,6 @@ void Lowering::LowerCast(GenTree* tree)
 
     assert(!varTypeIsSmall(srcType));
 
-    if (tmpType != TYP_UNDEF)
-    {
-        GenTree* tmp = comp->gtNewCastNode(tmpType, op1, tree->IsUnsigned(), tmpType);
-        tmp->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
-
-        tree->gtFlags &= ~GTF_UNSIGNED;
-        tree->AsOp()->gtOp1 = tmp;
-        BlockRange().InsertAfter(op1, tmp);
-    }
-
     // Now determine if we have operands that should be contained.
     ContainCheckCast(tree->AsCast());
 }
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 826d89dd2a491..52d7191e528dc 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -257,6 +257,7 @@ int LinearScan::BuildNode(GenTree* tree)
             FALLTHROUGH;
 
         case GT_AND:
+        case GT_AND_NOT:
         case GT_OR:
         case GT_XOR:
         case GT_LSH:

From 93e27c02ee37e088abe992bf2d19fd9f5c3f062c Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 1 Apr 2022 10:57:49 +0800
Subject: [PATCH 42/46] [LoongArch64] amend some code for CR round3.

---
 src/coreclr/jit/codegencommon.cpp           |  10 +-
 src/coreclr/jit/codegenloongarch64.cpp      |  85 ++------
 src/coreclr/jit/compiler.h                  |   6 +
 src/coreclr/jit/lclvars.cpp                 |  21 +-
 src/coreclr/jit/lower.cpp                   |  63 ++----
 src/coreclr/jit/lsraloongarch64.cpp         |  35 +---
 src/coreclr/jit/morph.cpp                   | 211 ++++++++------------
 src/coreclr/jit/register_arg_convention.cpp |  11 +-
 8 files changed, 134 insertions(+), 308 deletions(-)

diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 4eab2fbd6cffd..46f429f81b4f2 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -4341,9 +4341,9 @@ void CodeGen::genEnregisterIncomingStackArgs()
                 }
             }
         }
-#else
+#else // !TARGET_LOONGARCH64
         GetEmitter()->emitIns_R_S(ins_Load(regType), emitTypeSize(regType), regNum, varNum, 0);
-#endif // TARGET_LOONGARCH64
+#endif // !TARGET_LOONGARCH64
 
         regSet.verifyRegUsed(regNum);
 #ifdef USING_SCOPE_INFO
@@ -8142,6 +8142,8 @@ void CodeGen::genStructReturn(GenTree* treeNode)
         LclVarDsc*     varDsc  = compiler->lvaGetDesc(lclNode);
         assert(varDsc->lvIsMultiRegRet);
 #ifdef TARGET_LOONGARCH64
+        // On LoongArch64, for a struct like "{ int, double }", "retTypeDesc" will be "{ TYP_INT, TYP_DOUBLE }",
+        // i. e. not include the padding for the first field, and so the general loop below won't work.
         var_types type  = retTypeDesc.GetReturnRegType(0);
         regNumber toReg = retTypeDesc.GetABIReturnReg(0);
         GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), 0);
@@ -8154,7 +8156,7 @@ void CodeGen::genStructReturn(GenTree* treeNode)
             toReg      = retTypeDesc.GetABIReturnReg(1);
             GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
         }
-#else
+#else  // !TARGET_LOONGARCH64
         int offset = 0;
         for (unsigned i = 0; i < regCount; ++i)
         {
@@ -8163,7 +8165,7 @@ void CodeGen::genStructReturn(GenTree* treeNode)
             GetEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), toReg, lclNode->GetLclNum(), offset);
             offset += genTypeSize(type);
         }
-#endif
+#endif // !TARGET_LOONGARCH64
     }
     else
     {
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index e5b4be91ba713..40a19300095e9 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -1259,12 +1259,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
     unsigned saveRegsPlusPSPSize =
         roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + PSPSize;
 
-    if (compiler->info.compIsVarArgs)
-    {
-        // For varargs we always save all of the integer register arguments
-        // so that they are contiguous with the incoming stack arguments.
-        saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES;
-    }
     unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
 
     assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
@@ -4853,8 +4847,7 @@ int CodeGenInterface::genTotalFrameSize() const
 
     assert(!IsUninitialized(compiler->compCalleeRegsPushed));
 
-    int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) +
-                         compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+    int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
 
     assert(totalFrameSize > 0);
     return totalFrameSize;
@@ -5502,7 +5495,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
 
         case GT_LCL_FLD_ADDR:
         case GT_LCL_VAR_ADDR:
-            genCodeForLclAddr(treeNode);
+            genCodeForLclAddr(treeNode->AsLclVarCommon());
             break;
 
         case GT_LCL_FLD:
@@ -6852,20 +6845,20 @@ void CodeGen::genCodeForShift(GenTree* tree)
 // Arguments:
 //    tree - the node.
 //
-void CodeGen::genCodeForLclAddr(GenTree* tree)
+void CodeGen::genCodeForLclAddr(GenTreeLclVarCommon* lclAddrNode)
 {
-    assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
+    assert(lclAddrNode->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
 
-    var_types targetType = tree->TypeGet();
-    regNumber targetReg  = tree->GetRegNum();
+    var_types targetType = lclAddrNode->TypeGet();
+    emitAttr  size       = emitTypeSize(targetType);
+    regNumber targetReg  = lclAddrNode->GetRegNum();
 
     // Address of a local var.
     noway_assert((targetType == TYP_BYREF) || (targetType == TYP_I_IMPL));
 
-    emitAttr size = emitTypeSize(targetType);
+    GetEmitter()->emitIns_R_S(INS_lea, size, targetReg, lclAddrNode->GetLclNum(), lclAddrNode->GetLclOffs());
 
-    inst_RV_TT(INS_lea, targetReg, tree, 0, size);
-    genProduceReg(tree);
+    genProduceReg(lclAddrNode);
 }
 
 //------------------------------------------------------------------------
@@ -7758,8 +7751,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
 #endif
 
     // Next move any un-enregistered register arguments back to their register.
-    regMaskTP fixedIntArgMask = RBM_NONE;    // tracks the int arg regs occupying fixed args in case of a vararg method.
-    unsigned  firstArgVarNum  = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+    unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
     for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
     {
         varDsc = compiler->lvaTable + varNum;
@@ -7821,7 +7813,6 @@ void CodeGen::genJmpMethod(GenTree* jmp)
                 regSet.AddMaskVars(genRegMask(argReg));
                 gcInfo.gcMarkRegPtrVal(argReg, loadType);
 
-                // if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
                 if (varDsc->GetOtherArgReg() < REG_STK)
                 {
                     // Restore the second register.
@@ -7851,59 +7842,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
 
         if (compiler->info.compIsVarArgs)
         {
-            NYI("unimplemented on LOONGARCH64 yet");
-            // In case of a jmp call to a vararg method ensure only integer registers are passed.
-            assert((genRegMask(argReg) & (RBM_ARG_REGS)) != RBM_NONE);
-            assert(!varDsc->lvIsHfaRegArg());
-
-            fixedIntArgMask |= genRegMask(argReg);
-
-            if (compiler->lvaIsMultiregStruct(varDsc, compiler->info.compIsVarArgs))
-            {
-                assert(argRegNext != REG_NA);
-                fixedIntArgMask |= genRegMask(argRegNext);
-            }
-
-            if (argReg == REG_ARG_0)
-            {
-                assert(firstArgVarNum == BAD_VAR_NUM);
-                firstArgVarNum = varNum;
-            }
-        }
-    }
-
-    // Jmp call to a vararg method - if the method has fewer than fixed arguments that can be max size of reg,
-    // load the remaining integer arg registers from the corresponding
-    // shadow stack slots.  This is for the reason that we don't know the number and type
-    // of non-fixed params passed by the caller, therefore we have to assume the worst case
-    // of caller passing all integer arg regs that can be max size of reg.
-    //
-    // The caller could have passed gc-ref/byref type var args.  Since these are var args
-    // the callee no way of knowing their gc-ness.  Therefore, mark the region that loads
-    // remaining arg registers from shadow stack slots as non-gc interruptible.
-    if (fixedIntArgMask != RBM_NONE)
-    {
-        assert(compiler->info.compIsVarArgs);
-        assert(firstArgVarNum != BAD_VAR_NUM);
-
-        regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
-        if (remainingIntArgMask != RBM_NONE)
-        {
-            GetEmitter()->emitDisableGC();
-            for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
-            {
-                regNumber argReg     = intArgRegs[argNum];
-                regMaskTP argRegMask = genRegMask(argReg);
-
-                if ((remainingIntArgMask & argRegMask) != 0)
-                {
-                    remainingIntArgMask &= ~argRegMask;
-                    GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, argReg, firstArgVarNum, argOffset);
-                }
-
-                argOffset += REGSIZE_BYTES;
-            }
-            GetEmitter()->emitEnableGC();
+            NYI_LOONGARCH64("genJmpMethod unsupports compIsVarArgs");
         }
     }
 }
@@ -9000,7 +8939,7 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
     if (compiler->info.compIsVarArgs)
     {
         JITDUMP("    compIsVarArgs=true\n");
-        NYI_LOONGARCH64("genPushCalleeSavedRegisters - compIsVarArgs");
+        NYI_LOONGARCH64("genPushCalleeSavedRegisters unsupports compIsVarArgs");
     }
 
 #ifdef DEBUG
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 2a0f894c8c1a8..8a1ab5809e679 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -1696,6 +1696,12 @@ struct fgArgTabEntry
     unsigned numRegs; // Count of number of registers that this argument uses.
                       // Note that on ARM, if we have a double hfa, this reflects the number
                       // of DOUBLE registers.
+#ifdef TARGET_LOONGARCH64
+    // For LoongArch64's ABI, the struct which has float field(s) and no more than two fields
+    // may be passed by float register(s).
+    // e.g  `struct {int a; float b;}` passed by an integer register and a float register.
+    var_types structFloatFieldType[2];
+#endif
 
 #if defined(UNIX_AMD64_ABI)
     // Unix amd64 will split floating point types and integer types in structs
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 61fabffaff304..115cc7953f46e 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -925,12 +925,14 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                 argRegTypeInStruct2 = (floatFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
             }
 
+            assert((floatNum == 1) || (floatNum == 2));
+
             if (!canPassArgInRegisters)
             {
-                assert((floatNum == 1) || (floatNum == 2)); // `if ((floatFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) != 0)`
+                // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument,
+                // integer registers (if any) are used instead.
                 canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
 
-                // On LoongArch64, there aren't even any remaining integer registers to pass the arguments.
                 argRegTypeInStruct1 = TYP_UNKNOWN;
                 argRegTypeInStruct2 = TYP_UNKNOWN;
             }
@@ -1057,8 +1059,10 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
                         // it has to be split. But we reserved extra 8-bytes for the whole struct.
                         varDsc->lvIsSplit = 1;
                         varDsc->SetOtherArgReg(REG_STK);
-                        varDscInfo->setAllRegArgUsed(arg1Type);
+                        varDscInfo->setAllRegArgUsed(argRegTypeInStruct1);
+#if FEATURE_FASTTAILCALL
                         varDscInfo->stackArgSize += TARGET_POINTER_SIZE;
+#endif
                     }
                 }
                 else
@@ -6314,13 +6318,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 #elif defined(TARGET_LOONGARCH64)
 
     int initialStkOffs = 0;
-    if (info.compIsVarArgs)
-    {
-        // For varargs we always save all of the integer register arguments
-        // so that they are contiguous with the incoming stack arguments.
-        initialStkOffs = MAX_REG_ARG * REGSIZE_BYTES;
-        stkOffs -= initialStkOffs;
-    }
 
     // Subtract off FP and RA.
     assert(compCalleeRegsPushed >= 2);
@@ -6827,7 +6824,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                     continue;
                 }
 
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64)
                 if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum())
                 {
                     // Stack offset to varargs (parameters) should point to home area which will be preallocated.
@@ -7087,7 +7084,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'.
     int pushedCount = compCalleeRegsPushed;
 
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64)
     if (info.compIsVarArgs)
     {
         pushedCount += MAX_REG_ARG;
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 1460715b788aa..61a655ddc2cf9 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -2833,7 +2833,6 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
     assert(cmp->gtGetOp2()->IsIntegralConst());
 
 #if defined(TARGET_XARCH) || defined(TARGET_ARM64)
-    // TODO-LoongArch64: add optimize for LoongArch64.
     GenTree*       op1      = cmp->gtGetOp1();
     GenTreeIntCon* op2      = cmp->gtGetOp2()->AsIntCon();
     ssize_t        op2Value = op2->IconValue();
@@ -3190,7 +3189,7 @@ GenTree* Lowering::LowerCompare(GenTree* cmp)
 //
 GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
 {
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
     GenTree* relop    = jtrue->gtGetOp1();
     GenTree* relopOp2 = relop->AsOp()->gtGetOp2();
 
@@ -3199,6 +3198,14 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
         bool         useJCMP = false;
         GenTreeFlags flags   = GTF_EMPTY;
 
+#if defined(TARGET_LOONGARCH64)
+        if (relop->OperIs(GT_EQ, GT_NE))
+        {
+            // Codegen will use beq or bne.
+            flags   = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : GTF_EMPTY;
+            useJCMP = true;
+        }
+#else  // TARGET_ARM64
         if (relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0))
         {
             // Codegen will use cbz or cbnz in codegen which do not affect the flag register
@@ -3211,6 +3218,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
             flags   = GTF_JCMP_TST | (relop->OperIs(GT_TEST_EQ) ? GTF_JCMP_EQ : GTF_EMPTY);
             useJCMP = true;
         }
+#endif // TARGET_ARM64
 
         if (useJCMP)
         {
@@ -3227,48 +3235,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
             return nullptr;
         }
     }
-#elif defined(TARGET_LOONGARCH64)
-    GenTree* relop    = jtrue->gtGetOp1();
-    GenTree* relopOp1 = relop->AsOp()->gtGetOp1();
-    GenTree* relopOp2 = relop->AsOp()->gtGetOp2();
-
-    if (relop->gtNext == jtrue)
-    {
-        if (relopOp2->IsCnsIntOrI())
-        {
-            if (relop->OperIs(GT_EQ, GT_NE))
-            {
-
-                // Codegen will use beq or bne in codegen.
-                GenTreeFlags flags = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : GTF_EMPTY;
-
-                relop->SetOper(GT_JCMP);
-                relop->gtFlags &= ~(GTF_JCMP_TST | GTF_JCMP_EQ);
-                relop->gtFlags |= flags;
-                relop->gtType = TYP_VOID;
-
-                relopOp2->SetContained();
-
-                BlockRange().Remove(jtrue);
-
-                assert(relop->gtNext == nullptr);
-                return nullptr;
-            }
-        }
-        else if (relopOp1->IsCnsIntOrI())
-        {
-            relopOp1->SetContained();
-        }
-    }
-    else if (relopOp1->IsCnsIntOrI())
-    {
-        relopOp1->SetContained();
-    }
-    else if (relopOp2->IsCnsIntOrI())
-    {
-        relopOp2->SetContained();
-    }
-#endif // TARGET_LOONGARCH64
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64
 
     ContainCheckJTrue(jtrue);
 
@@ -3981,9 +3948,9 @@ void Lowering::LowerStoreSingleRegCallStruct(GenTreeBlk* store)
     {
 #if defined(TARGET_LOONGARCH64)
         if (varTypeIsFloating(call->TypeGet()))
+        {
             regType = call->TypeGet();
-        assert(regType != TYP_UNDEF);
-        assert(regType != TYP_STRUCT);
+        }
 #endif
         store->ChangeType(regType);
         store->SetOper(GT_STOREIND);
@@ -5735,7 +5702,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
         GenTree* firstNode        = nullptr;
         GenTree* adjustedDividend = dividend;
 
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64)
         // On ARM64 we will use a 32x32->64 bit multiply instead of a 64x64->64 one.
         bool widenToNativeIntForMul = (type != TYP_I_IMPL) && !simpleMul;
 #else
@@ -5789,7 +5756,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
         }
         else
         {
-#if defined(TARGET_ARM64)
+#ifdef TARGET_ARM64
             // 64-bit MUL is more expensive than UMULL on ARM64.
             genTreeOps mulOper = simpleMul ? GT_MUL_LONG : GT_MULHI;
 #else
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index 52d7191e528dc..aee2f9791b898 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -163,6 +163,8 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_COMMA:
         case GT_QMARK:
         case GT_COLON:
+        case GT_CLS_VAR:
+        case GT_ADDR:
             srcCount = 0;
             assert(dstCount == 0);
             unreached();
@@ -404,18 +406,6 @@ int LinearScan::BuildNode(GenTree* tree)
             }
             break;
 
-        case GT_ADDR:
-        {
-            // For a GT_ADDR, the child node should not be evaluated into a register
-            GenTree* child = tree->gtGetOp1();
-            assert(!isCandidateLocalRef(child));
-            assert(child->isContained());
-            assert(dstCount == 1);
-            srcCount = 0;
-            BuildDef(tree);
-        }
-        break;
-
         case GT_BLK:
             // These should all be eliminated prior to Lowering.
             assert(!"Non-store block node in Lowering");
@@ -628,20 +618,6 @@ int LinearScan::BuildNode(GenTree* tree)
             BuildDef(tree, RBM_EXCEPTION_OBJECT);
             break;
 
-        case GT_CLS_VAR:
-            srcCount = 0;
-            // GT_CLS_VAR, by the time we reach the backend, must always
-            // be a pure use.
-            // It will produce a result of the type of the
-            // node, and use an internal register for the address.
-
-            assert(dstCount == 1);
-            assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
-            buildInternalIntRegisterDefForNode(tree);
-            buildInternalRegisterUses();
-            BuildDef(tree);
-            break;
-
         case GT_INDEX_ADDR:
             assert(dstCount == 1);
             srcCount = BuildBinaryUses(tree->AsOp());
@@ -1339,13 +1315,6 @@ int LinearScan::BuildCast(GenTreeCast* cast)
     const var_types srcType  = genActualType(src->TypeGet());
     const var_types castType = cast->gtCastType;
 
-    // Overflow checking cast from TYP_LONG to TYP_INT requires a temporary register to
-    // store the min and max immediate values that cannot be encoded in the CMP instruction.
-    if (cast->gtOverflow() && varTypeIsLong(srcType) && !cast->IsUnsigned() && (castType == TYP_INT))
-    {
-        buildInternalIntRegisterDefForNode(cast);
-    }
-
     int srcCount = BuildOperandUses(src);
     buildInternalRegisterUses();
     BuildDef(cast);
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 4b9911ca03559..cc6d4371f4ee2 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -932,8 +932,6 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned          argNum,
         AddRegArg(argNum, node, use, regNum, numRegs, byteSize, byteAlignment, isStruct, false, isVararg);
     assert(curArgTabEntry != nullptr);
 
-    curArgTabEntry->isStruct = isStruct; // is this a struct arg
-
     INDEBUG(curArgTabEntry->checkIsStruct();)
     assert(numRegs <= 2);
     if (numRegs == 2)
@@ -2932,7 +2930,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 #endif
         }
 
-#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64)
 
         assert(!callIsVararg || !isHfaArg);
         passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeUsesFloatReg(argx));
@@ -2945,13 +2943,16 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
         passUsingFloatRegs = false;
 
+#elif defined(TARGET_LOONGARCH64)
+
+        assert(!callIsVararg && !isHfaArg);
+        passUsingFloatRegs    = varTypeUsesFloatReg(argx);
+        DWORD floatFieldFlags = STRUCT_NO_FLOAT_FIELD;
+
 #else
 #error Unsupported or unset target architecture
 #endif // TARGET*
 
-#if defined(TARGET_LOONGARCH64)
-        DWORD floatFieldFlags = 0;
-#endif
         bool      isBackFilled     = false;
         unsigned  nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
         var_types structBaseType   = TYP_STRUCT;
@@ -3086,11 +3087,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                 passUsingFloatRegs = (floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false;
                 compFloatingPointUsed |= passUsingFloatRegs;
 
-                if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO))
+                if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) != 0)
                 {
                     size = 1;
                 }
-                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
+                else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0)
                 {
                     size = 2;
                 }
@@ -3269,18 +3270,18 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                     }
                     else if (passUsingFloatRegs)
                     {
-                        if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
+                        if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0)
                         {
                             nextOtherRegNum = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + 1);
                         }
-                        else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
+                        else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND) != 0)
                         {
                             assert(size == 1);
                             size               = 2;
                             passUsingFloatRegs = false;
                             nextOtherRegNum    = genMapFloatRegArgNumToRegNum(nextFltArgRegNum);
                         }
-                        else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST)
+                        else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0)
                         {
                             assert(size == 1);
                             size            = 2;
@@ -3501,15 +3502,34 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                     {
                         structBaseType = structSize == 8 ? TYP_DOUBLE : TYP_FLOAT;
                         fltArgRegNum += 1;
+                        newArgEntry->structFloatFieldType[0] = structBaseType;
                     }
-                    else if (floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND))
+                    else if ((floatFieldFlags & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND)) != 0)
                     {
                         fltArgRegNum += 1;
                         intArgRegNum += 1;
+                        if ((floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST) != 0)
+                        {
+                            newArgEntry->structFloatFieldType[0] =
+                                (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                            newArgEntry->structFloatFieldType[1] =
+                                (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                        }
+                        else
+                        {
+                            newArgEntry->structFloatFieldType[0] =
+                                (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_LONG : TYP_INT;
+                            newArgEntry->structFloatFieldType[1] =
+                                (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                        }
                     }
-                    else if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
+                    else if ((floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO) != 0)
                     {
                         fltArgRegNum += 2;
+                        newArgEntry->structFloatFieldType[0] =
+                            (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                        newArgEntry->structFloatFieldType[1] =
+                            (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
                     }
                 }
 #else
@@ -4549,19 +4569,34 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
     {
         assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE);
         BYTE gcPtrs[MAX_ARG_REG_COUNT];
-        elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
         info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+        elemCount = roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
+#ifdef TARGET_LOONGARCH64
+        // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE
+        // may be passed by two registers.
+        // e.g  `struct {int a; float b;}` passed by an integer register and a float register.
+        if (fgEntryPtr->numRegs == 2)
+        {
+            elemCount = 2;
+        }
+#endif
 
         for (unsigned inx = 0; inx < elemCount; inx++)
         {
-#ifdef UNIX_AMD64_ABI
+#if defined(UNIX_AMD64_ABI)
             if (gcPtrs[inx] == TYPE_GC_NONE)
             {
                 type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx],
                                                               fgEntryPtr->structDesc.eightByteSizes[inx]);
             }
             else
-#endif // UNIX_AMD64_ABI
+#elif defined(TARGET_LOONGARCH64)
+            if (fgEntryPtr->structFloatFieldType[inx] != TYP_UNDEF)
+            {
+                type[inx] = fgEntryPtr->structFloatFieldType[inx];
+            }
+            else
+#endif // TARGET_LOONGARCH64
             {
                 type[inx] = getJitGCType(gcPtrs[inx]);
             }
@@ -4575,7 +4610,12 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
             // lives in the stack frame or will be a promoted field.
             //
+#ifndef TARGET_LOONGARCH64
+            // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE
+            // may be passed by two registers.
+            // e.g  `struct {int a; float b;}` passed by an integer register and a float register.
             structSize = elemCount * TARGET_POINTER_SIZE;
+#endif
         }
         else // we must have a GT_OBJ
         {
@@ -4879,65 +4919,23 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             //
             unsigned offset = baseOffset;
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
-#if defined(TARGET_LOONGARCH64)
-            uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
-            if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
+            for (unsigned inx = 0; inx < elemCount; inx++)
             {
-                assert((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);
-                var_types tmp_type_1;
-                var_types tmp_type_2;
-
-                compFloatingPointUsed = true;
-                if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
-                {
-                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                }
-                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_FIRST)
-                {
-                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? type[1] : TYP_INT;
-                }
-                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
+                GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
+                newArg->AddField(this, nextLclFld, offset, type[inx]);
+#ifdef TARGET_LOONGARCH64
+                if (structSize > TARGET_POINTER_SIZE)
                 {
-                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? type[0] : TYP_INT;
-                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                    // For LoongArch64's ABI, maybe there is a padding.
+                    // e.g. `struct {float a; long b;}`
+                    offset += TARGET_POINTER_SIZE;
                 }
                 else
+#endif
                 {
-                    NYI_LOONGARCH64("fgMorphMultiregStructArg -- GT_LCL_FLD,GT_LCL_VAR");
-                    tmp_type_1 = TYP_UNDEF;
-                    tmp_type_2 = TYP_UNDEF;
-                }
-                elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4;
-
-                GenTree* nextLclFld = gtNewLclFldNode(varNum, tmp_type_1, offset);
-                newArg->AddField(this, nextLclFld, offset, tmp_type_1);
-                offset += elemSize;
-                nextLclFld = gtNewLclFldNode(varNum, tmp_type_2, offset);
-                newArg->AddField(this, nextLclFld, offset, tmp_type_2);
-            }
-            else
-            {
-                GenTree* nextLclFld = gtNewLclFldNode(varNum, type[0], offset);
-                newArg->AddField(this, nextLclFld, offset, type[0]);
-
-                if (elemCount > 1)
-                {
-                    assert(elemCount == 2);
-                    elemSize   = genTypeSize(type[1]);
-                    nextLclFld = gtNewLclFldNode(varNum, type[1], offset + elemSize);
-                    newArg->AddField(this, nextLclFld, offset + elemSize, type[1]);
+                    offset += genTypeSize(type[inx]);
                 }
             }
-#else
-            for (unsigned inx = 0; inx < elemCount; inx++)
-            {
-                GenTree* nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
-                newArg->AddField(this, nextLclFld, offset, type[inx]);
-                offset += genTypeSize(type[inx]);
-            }
-#endif
         }
         // Are we passing a GT_OBJ struct?
         //
@@ -4967,78 +4965,35 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
 
             newArg          = new (this, GT_FIELD_LIST) GenTreeFieldList();
             unsigned offset = 0;
-#if defined(TARGET_LOONGARCH64)
-            uint32_t floatFieldFlags = info.compCompHnd->getLoongArch64PassStructInRegisterFlags(objClass);
-            if (floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE))
+            for (unsigned inx = 0; inx < elemCount; inx++)
             {
-                assert((floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) > 1);
-                var_types tmp_type_1;
-                var_types tmp_type_2;
-
-                compFloatingPointUsed = true;
-                if (floatFieldFlags & STRUCT_FLOAT_FIELD_ONLY_TWO)
-                {
-                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                }
-                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
-                {
-                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
-                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? type[1] : TYP_INT;
-                }
-                else if (floatFieldFlags & STRUCT_FLOAT_FIELD_SECOND)
+                GenTree* curAddr = baseAddr;
+                if (offset != 0)
                 {
-                    tmp_type_1 = (floatFieldFlags & STRUCT_FIRST_FIELD_SIZE_IS8) ? type[0] : TYP_INT;
-                    tmp_type_2 = (floatFieldFlags & STRUCT_SECOND_FIELD_SIZE_IS8) ? TYP_DOUBLE : TYP_FLOAT;
+                    GenTree* baseAddrDup = gtCloneExpr(baseAddr);
+                    noway_assert(baseAddrDup != nullptr);
+                    curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
                 }
                 else
                 {
-                    NYI_LOONGARCH64("fgMorphMultiregStructArg -- GT_OBJ struct");
-                    tmp_type_1 = TYP_UNDEF;
-                    tmp_type_2 = TYP_UNDEF;
+                    curAddr = baseAddr;
                 }
-                elemSize = (floatFieldFlags & STRUCT_HAS_8BYTES_FIELDS_MASK) ? 8 : 4;
+                GenTree* curItem = gtNewIndir(type[inx], curAddr);
 
-                GenTree* curItem = gtNewIndir(tmp_type_1, baseAddr);
                 // For safety all GT_IND should have at least GT_GLOB_REF set.
                 curItem->gtFlags |= GTF_GLOB_REF;
 
-                newArg = new (this, GT_FIELD_LIST) GenTreeFieldList();
-                newArg->AddField(this, curItem, 0, tmp_type_1);
-
-                // GenTree* curAddr = baseAddr;
-                baseAddr = gtCloneExpr(baseAddr);
-                noway_assert(baseAddr != nullptr);
-                baseAddr = gtNewOperNode(GT_ADD, addrType, baseAddr, gtNewIconNode(elemSize, TYP_I_IMPL));
-
-                curItem = gtNewIndir(tmp_type_2, baseAddr);
-                // For safety all GT_IND should have at least GT_GLOB_REF set.
-                curItem->gtFlags |= GTF_GLOB_REF;
-
-                newArg->AddField(this, curItem, elemSize, tmp_type_2);
-            }
-            else
+                newArg->AddField(this, curItem, offset, type[inx]);
+#ifdef TARGET_LOONGARCH64
+                if (structSize > TARGET_POINTER_SIZE)
+                {
+                    // For LoongArch64's ABI, maybe there is a padding.
+                    // e.g. `struct {float a; long b;}`
+                    offset += TARGET_POINTER_SIZE;
+                }
+                else
 #endif
-            {
-                for (unsigned inx = 0; inx < elemCount; inx++)
                 {
-                    GenTree* curAddr = baseAddr;
-                    if (offset != 0)
-                    {
-                        GenTree* baseAddrDup = gtCloneExpr(baseAddr);
-                        noway_assert(baseAddrDup != nullptr);
-                        curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
-                    }
-                    else
-                    {
-                        curAddr = baseAddr;
-                    }
-                    GenTree* curItem = gtNewIndir(type[inx], curAddr);
-
-                    // For safety all GT_IND should have at least GT_GLOB_REF set.
-                    curItem->gtFlags |= GTF_GLOB_REF;
-
-                    newArg->AddField(this, curItem, offset, type[inx]);
                     offset += genTypeSize(type[inx]);
                 }
             }
diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp
index 1b5d1839b5e4c..bcc3dbc87e471 100644
--- a/src/coreclr/jit/register_arg_convention.cpp
+++ b/src/coreclr/jit/register_arg_convention.cpp
@@ -43,16 +43,7 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
         nextReg(TYP_INT, numRegs);
         nextReg(TYP_FLOAT, numRegs);
 #elif defined(TARGET_LOONGARCH64)
-        // LA-ABI64.
-        if (numRegs > MAX_PASS_MULTIREG_BYTES / TARGET_POINTER_SIZE)
-        {
-            assert(varTypeIsStruct(type));
-            nextReg(TYP_INT, 1); // TYP_BYREF
-        }
-        else
-        {
-            nextReg(type, numRegs);
-        }
+        nextReg(type, numRegs);
 #else
         // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
         nextReg(type, numRegs);

From c0bbc8a0edc7822c7170c50a4fe05d4ada6ae646 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 1 Apr 2022 17:09:08 +0800
Subject: [PATCH 43/46] [LoongArch64] amend some code for CR round4.

---
 src/coreclr/jit/lclvars.cpp                 | 18 ++++++++++++++----
 src/coreclr/jit/lower.cpp                   |  4 ++--
 src/coreclr/jit/lsraloongarch64.cpp         |  9 ++-------
 src/coreclr/jit/morph.cpp                   | 11 ++++++-----
 src/coreclr/jit/register_arg_convention.cpp |  2 --
 5 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index 115cc7953f46e..7cb32eac9f9ff 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -931,10 +931,22 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un
             {
                 // On LoongArch64, if there aren't any remaining floating-point registers to pass the argument,
                 // integer registers (if any) are used instead.
+                varDscInfo->setAllRegArgUsed(TYP_DOUBLE);
                 canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
 
                 argRegTypeInStruct1 = TYP_UNKNOWN;
                 argRegTypeInStruct2 = TYP_UNKNOWN;
+
+                if (cSlotsToEnregister == 2)
+                {
+                    if (!canPassArgInRegisters && varDscInfo->canEnreg(TYP_I_IMPL, 1))
+                    {
+                        // Here a struct-arg which needs two registers but only one integer register available,
+                        // it has to be split.
+                        argRegTypeInStruct1   = TYP_I_IMPL;
+                        canPassArgInRegisters = true;
+                    }
+                }
             }
         }
         else
@@ -6317,8 +6329,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
 
 #elif defined(TARGET_LOONGARCH64)
 
-    int initialStkOffs = 0;
-
     // Subtract off FP and RA.
     assert(compCalleeRegsPushed >= 2);
     stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
@@ -6824,7 +6834,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
                     continue;
                 }
 
-#if defined(TARGET_ARM64)
+#ifdef TARGET_ARM64
                 if (info.compIsVarArgs && varDsc->GetArgReg() != theFixedRetBuffArgNum())
                 {
                     // Stack offset to varargs (parameters) should point to home area which will be preallocated.
@@ -7084,7 +7094,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
     // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'.
     int pushedCount = compCalleeRegsPushed;
 
-#if defined(TARGET_ARM64)
+#ifdef TARGET_ARM64
     if (info.compIsVarArgs)
     {
         pushedCount += MAX_REG_ARG;
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 61a655ddc2cf9..9e96abc04d727 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -5494,7 +5494,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
             return next;
         }
 
-#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
+#ifdef TARGET_XARCH
         if (BlockRange().TryGetUse(node, &use))
         {
             // If this is a child of an indir, let the parent handle it.
@@ -5505,7 +5505,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
                 TryCreateAddrMode(node, false, parent);
             }
         }
-#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64
+#endif // TARGET_XARCH
     }
 
     if (node->OperIs(GT_ADD))
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index aee2f9791b898..e51e1d8ca0e56 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -1310,14 +1310,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
 //
 int LinearScan::BuildCast(GenTreeCast* cast)
 {
-    GenTree* src = cast->gtGetOp1();
-
-    const var_types srcType  = genActualType(src->TypeGet());
-    const var_types castType = cast->gtCastType;
-
-    int srcCount = BuildOperandUses(src);
-    buildInternalRegisterUses();
+    int srcCount = BuildOperandUses(cast->CastOp());
     BuildDef(cast);
+
     return srcCount;
 }
 
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index cc6d4371f4ee2..d1234696f77af 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -4591,7 +4591,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             }
             else
 #elif defined(TARGET_LOONGARCH64)
-            if (fgEntryPtr->structFloatFieldType[inx] != TYP_UNDEF)
+            if (varTypeIsFloating(fgEntryPtr->structFloatFieldType[inx]) ||
+                (genTypeSize(fgEntryPtr->structFloatFieldType[inx]) == 4))
             {
                 type[inx] = fgEntryPtr->structFloatFieldType[inx];
             }
@@ -4606,10 +4607,10 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
         if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
         {
             elemSize = TARGET_POINTER_SIZE;
-            // We can safely widen this to aligned bytes since we are loading from
-            // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
-            // lives in the stack frame or will be a promoted field.
-            //
+// We can safely widen this to aligned bytes since we are loading from
+// a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
+// lives in the stack frame or will be a promoted field.
+//
 #ifndef TARGET_LOONGARCH64
             // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE
             // may be passed by two registers.
diff --git a/src/coreclr/jit/register_arg_convention.cpp b/src/coreclr/jit/register_arg_convention.cpp
index bcc3dbc87e471..a90e61c3a59fd 100644
--- a/src/coreclr/jit/register_arg_convention.cpp
+++ b/src/coreclr/jit/register_arg_convention.cpp
@@ -42,8 +42,6 @@ unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
         // For System V the reg type counters should be independent.
         nextReg(TYP_INT, numRegs);
         nextReg(TYP_FLOAT, numRegs);
-#elif defined(TARGET_LOONGARCH64)
-        nextReg(type, numRegs);
 #else
         // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
         nextReg(type, numRegs);

From d57ddb537cd0ee1a04d112724296921679bb3aac Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 1 Apr 2022 21:35:22 +0800
Subject: [PATCH 44/46] [LoongArch64] amend some code for CR round5.

---
 src/coreclr/jit/codegenloongarch64.cpp | 79 --------------------------
 src/coreclr/jit/morph.cpp              | 36 +++++++-----
 2 files changed, 23 insertions(+), 92 deletions(-)

diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 40a19300095e9..15d4f78de4704 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -4066,85 +4066,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
         emit->emitIns_R_R(INS_mov, EA_PTRSIZE, targetReg, REG_R0);
         emit->emitIns_R_I(INS_movcf2gr, EA_PTRSIZE, targetReg, 1 /*cc*/);
     }
-    else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed())
-    {
-        ssize_t imm1 = op1->AsIntCon()->gtIconVal;
-        ssize_t imm2 = op2->AsIntCon()->gtIconVal;
-
-        assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
-
-        bool        IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
-        instruction ins        = INS_beqz;
-
-        switch (cmpSize)
-        {
-            case EA_4BYTE:
-            {
-                imm1 = static_cast<int32_t>(imm1);
-                imm2 = static_cast<int32_t>(imm2);
-            }
-            break;
-            case EA_8BYTE:
-                break;
-            case EA_1BYTE:
-            {
-                imm1 = static_cast<int8_t>(imm1);
-                imm2 = static_cast<int8_t>(imm2);
-            }
-            break;
-            // case EA_2BYTE:
-            //    imm = static_cast<uint16_t>(imm);
-            //    break;
-            default:
-                assert(!"Unexpected type in jumpCompare.");
-        }
-
-        switch (tree->OperGet())
-        {
-            case GT_LT:
-                if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2)))
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_LE:
-                if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2)))
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_EQ:
-                if (imm1 == imm2)
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_NE:
-                if (imm1 != imm2)
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_GT:
-                if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2)))
-                {
-                    ins = INS_b;
-                }
-                break;
-            case GT_GE:
-                if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2)))
-                {
-                    ins = INS_b;
-                }
-                break;
-            default:
-                break;
-        }
-
-        assert(ins != INS_invalid);
-        jtree->gtOp2 = (GenTree*)REG_SP;
-        jtree->SetRegNum((regNumber)ins);
-    }
     else
     {
         if (op1->isContainedIntOrIImmed())
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index d1234696f77af..2b9f7ebf80c8c 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -2024,7 +2024,12 @@ void fgArgInfo::EvalArgsToTemps()
                         setupArg = compiler->fgMorphCopyBlock(setupArg);
 #if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64)
 #if defined(TARGET_LOONGARCH64)
-                        // For LoongArch64, the struct {float a; float b;} passed by float-registers.
+                        // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG"
+                        // for "struct { float, float }", and retyping to a primitive here will cause the
+                        // multi-reg morphing to not kick in (the struct in question needs to be passed in
+                        // two FP registers).
+                        // TODO-LoongArch64: fix "getPrimitiveTypeForStruct" or use the ABI information in
+                        // the arg entry instead of calling it here.
                         if ((lclVarType == TYP_STRUCT) && (curArgTabEntry->numRegs == 1))
 #else
                         if (lclVarType == TYP_STRUCT)
@@ -3087,6 +3092,17 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
                 passUsingFloatRegs = (floatFieldFlags & STRUCT_HAS_FLOAT_FIELDS_MASK) ? true : false;
                 compFloatingPointUsed |= passUsingFloatRegs;
 
+                if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) != 0)
+                {
+                    // On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG"
+                    // for "struct { float, float }", and retyping to a primitive here will cause the
+                    // multi-reg morphing to not kick in (the struct in question needs to be passed in
+                    // two FP registers). Here is just keep "structBaseType" as "TYP_STRUCT".
+                    // TODO-LoongArch64: fix "getPrimitiveTypeForStruct" or use the ABI information in
+                    // the arg entry instead of calling it here.
+                    structBaseType = TYP_STRUCT;
+                }
+
                 if ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_TWO)) != 0)
                 {
                     size = 1;
@@ -3615,14 +3631,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
         if (newArgEntry->isStruct)
         {
             newArgEntry->passedByRef = passStructByRef;
-#if defined(TARGET_LOONGARCH64)
-            newArgEntry->argType = ((floatFieldFlags & (STRUCT_HAS_FLOAT_FIELDS_MASK ^ STRUCT_FLOAT_FIELD_ONLY_ONE)) ||
-                                    (structBaseType == TYP_UNKNOWN))
-                                       ? argx->TypeGet()
-                                       : structBaseType;
-#else
-            newArgEntry->argType = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
-#endif
+            newArgEntry->argType     = (structBaseType == TYP_UNKNOWN) ? argx->TypeGet() : structBaseType;
         }
         else
         {
@@ -4607,10 +4616,11 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
         if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
         {
             elemSize = TARGET_POINTER_SIZE;
-// We can safely widen this to aligned bytes since we are loading from
-// a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
-// lives in the stack frame or will be a promoted field.
-//
+            // We can safely widen this to aligned bytes since we are loading from
+            // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
+            // lives in the stack frame or will be a promoted field.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
 #ifndef TARGET_LOONGARCH64
             // For LoongArch64's ABI, the struct which size is TARGET_POINTER_SIZE
             // may be passed by two registers.

From ae3fbc02d2406006749b5c4e9c5c1e0b6bf9fa35 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Sun, 3 Apr 2022 01:08:54 +0800
Subject: [PATCH 45/46] [LoongArch64] amend some code after refacting.

---
 src/coreclr/jit/codegenloongarch64.cpp | 124 ++++---------------------
 src/coreclr/jit/emitloongarch64.cpp    |   5 +-
 src/coreclr/jit/instr.cpp              |  14 ++-
 src/coreclr/jit/lsraloongarch64.cpp    |   6 ++
 4 files changed, 40 insertions(+), 109 deletions(-)

diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 15d4f78de4704..9f74cef09f096 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -4100,23 +4100,21 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
         {
             ssize_t imm = op2->AsIntCon()->gtIconVal;
 
+            switch (cmpSize)
             {
-                switch (cmpSize)
-                {
-                    case EA_4BYTE:
-                        imm = static_cast<int32_t>(imm);
-                        break;
-                    case EA_8BYTE:
-                        break;
-                    case EA_1BYTE:
-                        imm = static_cast<int8_t>(imm);
-                        break;
-                    // case EA_2BYTE:
-                    //    imm = static_cast<uint16_t>(imm);
-                    //    break;
-                    default:
-                        assert(!"Unexpected type in jumpTrue(imm).");
-                }
+                case EA_4BYTE:
+                    imm = static_cast<int32_t>(imm);
+                    break;
+                case EA_8BYTE:
+                    break;
+                case EA_1BYTE:
+                    imm = static_cast<int8_t>(imm);
+                    break;
+                // case EA_2BYTE:
+                //    imm = static_cast<uint16_t>(imm);
+                //    break;
+                default:
+                    assert(!"Unexpected type in jumpTrue(imm).");
             }
 
             if (tree->OperIs(GT_LT))
@@ -4222,8 +4220,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
                     emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
                 }
             }
-
-            genProduceReg(tree);
         }
         else
         {
@@ -4274,9 +4270,8 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
                 emit->emitIns_R_R_R(INS_xor, EA_PTRSIZE, targetReg, regOp1, regOp2);
                 emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
             }
-
-            genProduceReg(tree);
         }
+        genProduceReg(tree);
     }
 }
 
@@ -4399,87 +4394,6 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                 jtrue->SetRegNum((regNumber)ins);
             }
         }
-        else if (op1->isContainedIntOrIImmed() && op2->isContainedIntOrIImmed())
-        {
-            ssize_t imm1 = op1->AsIntCon()->gtIconVal;
-            ssize_t imm2 = op2->AsIntCon()->gtIconVal;
-
-            assert(tree->OperIs(GT_LT, GT_LE, GT_EQ, GT_NE, GT_GT, GT_GE));
-
-            bool IsUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
-
-            switch (cmpSize)
-            {
-                case EA_4BYTE:
-                {
-                    imm1 = static_cast<int32_t>(imm1);
-                    imm2 = static_cast<int32_t>(imm2);
-                }
-                break;
-                case EA_8BYTE:
-                    break;
-                case EA_1BYTE:
-                {
-                    imm1 = static_cast<int8_t>(imm1);
-                    imm2 = static_cast<int8_t>(imm2);
-                }
-                break;
-
-                default:
-                    assert(!"Unexpected type in jumpTrue.");
-            }
-            switch (tree->OperGet())
-            {
-                case GT_LT:
-                    if (((!IsUnsigned) && (imm1 < imm2)) || ((IsUnsigned) && ((unsigned)imm1 < (unsigned)imm2)))
-                    {
-                        ins = INS_b;
-                    }
-                    break;
-                case GT_LE:
-                    if (((!IsUnsigned) && (imm1 <= imm2)) || ((IsUnsigned) && ((unsigned)imm1 <= (unsigned)imm2)))
-                    {
-                        ins = INS_b;
-                    }
-                    break;
-                case GT_EQ:
-                    if (imm1 == imm2)
-                    {
-                        ins = INS_b;
-                    }
-                    break;
-                case GT_NE:
-                    if (imm1 != imm2)
-                    {
-                        ins = INS_b;
-                    }
-                    break;
-                case GT_GT:
-                    if (((!IsUnsigned) && (imm1 > imm2)) || ((IsUnsigned) && ((unsigned)imm1 > (unsigned)imm2)))
-                    {
-                        ins = INS_b;
-                    }
-                    break;
-                case GT_GE:
-                    if (((!IsUnsigned) && (imm1 >= imm2)) || ((IsUnsigned) && ((unsigned)imm1 >= (unsigned)imm2)))
-                    {
-                        ins = INS_b;
-                    }
-                    break;
-                default:
-                    break;
-            }
-
-            if (IsEq && (ins != INS_invalid))
-            {
-                emit->emitIns_J(ins, compiler->compCurBB->bbJumpDest, 0); // 5-bits;
-            }
-            else if (ins != INS_invalid)
-            {
-                jtrue->gtOp2 = (GenTree*)(uint64_t)SaveCcResultReg;
-                jtrue->SetRegNum((regNumber)ins);
-            }
-        }
         else
         {
             if (op1->isContainedIntOrIImmed())
@@ -7229,8 +7143,8 @@ void CodeGen::genCall(GenTreeCall* call)
                 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
 
                 genConsumeReg(putArgRegNode);
-                inst_Mov_Extend(putArgRegNode->TypeGet(), /* srcInReg */ true, argReg, putArgRegNode->GetRegNum(),
-                                /* canSkip */ true, emitActualTypeSize(TYP_I_IMPL));
+                var_types dstType = emitter::isFloatReg(argReg) ? TYP_DOUBLE : TYP_I_IMPL;
+                inst_Mov(dstType, argReg, putArgRegNode->GetRegNum(), /* canSkip */ true);
 
                 argReg = genRegArgNext(argReg);
             }
@@ -7243,8 +7157,8 @@ void CodeGen::genCall(GenTreeCall* call)
         {
             regNumber argReg = curArgTabEntry->GetRegNum();
             genConsumeReg(argNode);
-            inst_Mov_Extend(argNode->TypeGet(), /* srcInReg */ true, argReg, argNode->GetRegNum(), /* canSkip */ true,
-                            emitActualTypeSize(TYP_I_IMPL));
+            var_types dstType = emitter::isFloatReg(argReg) ? TYP_DOUBLE : TYP_I_IMPL;
+            inst_Mov(dstType, argReg, argNode->GetRegNum(), /* canSkip */ true);
         }
     }
 
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index 0905579406548..1cdd8b7fd8b89 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -2534,6 +2534,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
         code |= (code_t)id->idReg4();
         code |= (code_t)id->idReg3() << 5;
         // the offset default is 0;
+        *(code_t*)dst = code;
     }
     else if (id->idIsReloc())
     {
@@ -2602,6 +2603,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
         code |= (code_t)reg2;
         code |= (code_t)REG_T2 << 5;
         // the offset default is 0;
+        *(code_t*)dst = code;
     }
 
     dst += 4;
@@ -3290,7 +3292,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
                     code = emitInsCode(INS_lu32i_d);
                     code |= (code_t)reg1;
-                    code |= (code_t)(imm >> 32) << 5;
+                    code |= (code_t)((imm >> 32) & 0xfffff) << 5;
 
                     *(code_t*)dst = code;
                     dst += 4;
@@ -3751,7 +3753,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
         // case INS_OPTS_NONE:
         default:
-            // assert(id->idGCref() == GCT_NONE);
             *(code_t*)dst = id->idAddr()->iiaGetInstrEncode();
             dst += 4;
             dst2 = dst;
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index b545df3649765..67ae437f03b75 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -447,15 +447,25 @@ void CodeGen::inst_Mov(var_types dstType,
     if (isFloatRegType(dstType) != genIsValidFloatReg(dstReg))
     {
         if (dstType == TYP_FLOAT)
+        {
             dstType = TYP_INT;
+        }
         else if (dstType == TYP_DOUBLE)
+        {
             dstType = TYP_LONG;
+        }
         else if (dstType == TYP_INT)
+        {
             dstType = TYP_FLOAT;
+        }
         else if (dstType == TYP_LONG)
+        {
             dstType = TYP_DOUBLE;
+        }
         else
-            assert(!"unimplemented on LOONGARCH yet");
+        {
+            NYI_LOONGARCH64("CodeGen::inst_Mov dstType");
+        }
     }
 #endif
     instruction ins = ins_Copy(srcReg, dstType);
@@ -1640,7 +1650,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType)
         return INS_vmov_f2i;
     }
 #elif defined(TARGET_LOONGARCH64)
-    // No SIMD support yet.
+    // TODO-LoongArch64-CQ: supporting SIMD.
     assert(!varTypeIsSIMD(dstType));
     if (dstIsFloatReg)
     {
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index e51e1d8ca0e56..ca0efc8bea169 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -355,6 +355,12 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_GE:
         case GT_GT:
         case GT_JCMP:
+            if (!varTypeIsFloating(tree))
+            {
+                buildInternalIntRegisterDefForNode(tree);
+                buildInternalIntRegisterDefForNode(tree);
+                buildInternalRegisterUses();
+            }
             srcCount = BuildCmp(tree);
             break;
 

From 4b8a5968f107508d15013c4551ddd3fc9f1ede19 Mon Sep 17 00:00:00 2001
From: qiaopengcheng <qiaopengcheng@loongson.cn>
Date: Fri, 8 Apr 2022 10:24:08 +0800
Subject: [PATCH 46/46] [LoongArch64] amend the compare and fix the error when
 running hello-world within debug-mode.

---
 src/coreclr/jit/codegencommon.cpp      |  4 ++--
 src/coreclr/jit/codegenloongarch64.cpp | 30 +++++++++++---------------
 src/coreclr/jit/emit.cpp               |  6 +++++-
 src/coreclr/jit/emitloongarch64.cpp    |  2 +-
 src/coreclr/jit/lsraloongarch64.cpp    | 12 +++++------
 src/coreclr/jit/morph.cpp              |  5 +++++
 6 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp
index 36c62a7c23643..0eccb2abfc8e5 100644
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -1313,7 +1313,7 @@ bool CodeGen::genCreateAddrMode(
     noway_assert(op2);
     switch (op2->gtOper)
     {
-#if !defined(TARGET_ARMARCH)
+#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64)
         // TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because
         // arm doesn't support both scale and offset at the same. Offset is handled
         // at the emitter as a peephole optimization.
@@ -1370,7 +1370,7 @@ bool CodeGen::genCreateAddrMode(
                 goto FOUND_AM;
             }
             break;
-#endif // TARGET_ARMARCH
+#endif // TARGET_ARMARCH || TARGET_LOONGARCH64
 
         case GT_NOP:
 
diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp
index 60f4f405fc582..afe5b0b95d5bd 100644
--- a/src/coreclr/jit/codegenloongarch64.cpp
+++ b/src/coreclr/jit/codegenloongarch64.cpp
@@ -4223,23 +4223,18 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
         }
         else
         {
-            regNumber tmpRegOp1 = tree->ExtractTempReg();
-            regNumber tmpRegOp2 = tree->ExtractTempReg();
-            regNumber regOp2    = op2->GetRegNum();
-            if (cmpSize == EA_4BYTE)
+            regNumber regOp2 = op2->GetRegNum();
+
+            if ((cmpSize == EA_4BYTE) && IsUnsigned)
             {
+                regNumber tmpRegOp1 = REG_RA;
+                regNumber tmpRegOp2 = rsGetRsvdReg();
+
+                emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp1, regOp1, 0);
+                emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp2, regOp2, 0);
+
                 regOp1 = tmpRegOp1;
                 regOp2 = tmpRegOp2;
-                if (IsUnsigned)
-                {
-                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, tmpRegOp1, op1->GetRegNum(), 31, 0);
-                    emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_PTRSIZE, tmpRegOp2, op2->GetRegNum(), 31, 0);
-                }
-                else
-                {
-                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp1, op1->GetRegNum(), 0);
-                    emit->emitIns_R_R_I(INS_slli_w, EA_4BYTE, tmpRegOp2, op2->GetRegNum(), 0);
-                }
             }
 
             if (tree->OperIs(GT_LT))
@@ -4271,7 +4266,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* jtree)
                 emit->emitIns_R_R_I(INS_sltui, EA_PTRSIZE, targetReg, targetReg, 1);
             }
         }
-        genProduceReg(tree);
     }
 }
 
@@ -4497,7 +4491,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                 if (IsUnsigned && cmpSize == EA_4BYTE && op2->OperIs(GT_LCL_VAR) &&
                     compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate())
                 {
-                    regNumber tmpRegOp1 = tree->ExtractTempReg();
+                    regNumber tmpRegOp1 = rsGetRsvdReg();
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
                     regOp1 = tmpRegOp1;
@@ -4506,7 +4500,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                 else if (IsUnsigned && cmpSize == EA_4BYTE && op1->OperIs(GT_LCL_VAR) &&
                          compiler->lvaTable[op1->AsLclVar()->GetLclNum()].lvIsRegCandidate())
                 {
-                    regNumber tmpRegOp1 = tree->ExtractTempReg();
+                    regNumber tmpRegOp1 = rsGetRsvdReg();
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
                     emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, REG_RA, regOp2, 31, 0);
                     regOp1 = tmpRegOp1;
@@ -4522,7 +4516,7 @@ void CodeGen::genCodeForJumpTrue(GenTreeOp* jtrue)
                 {
                     if (!(op1->gtFlags & GTF_UNSIGNED))
                     {
-                        regNumber tmpRegOp1 = tree->ExtractTempReg();
+                        regNumber tmpRegOp1 = rsGetRsvdReg();
                         emit->emitIns_R_R_I_I(INS_bstrpick_d, EA_8BYTE, tmpRegOp1, regOp1, 31, 0);
                         regOp1 = tmpRegOp1;
                     }
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index c372e44db0414..ba73a2f8e09f3 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -6552,7 +6552,11 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
                     }
                 }
 
-#endif // TARGET_XARCH
+#elif defined(TARGET_LOONGARCH64)
+
+                isJccAffectedIns = true;
+
+#endif // TARGET_LOONGARCH64
 
                 // Jcc affected instruction boundaries were printed above; handle other cases here.
                 if (!isJccAffectedIns)
diff --git a/src/coreclr/jit/emitloongarch64.cpp b/src/coreclr/jit/emitloongarch64.cpp
index 1cdd8b7fd8b89..9fb3e1f9cac1c 100644
--- a/src/coreclr/jit/emitloongarch64.cpp
+++ b/src/coreclr/jit/emitloongarch64.cpp
@@ -786,7 +786,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
             imm3 = imm3 ? imm2 - imm3 : imm2;
             code = emitInsCode(ins);
             code |= (code_t)reg1;
-            code |= (code_t)REG_RA;
+            code |= (code_t)REG_RA << 5;
             code |= (code_t)(imm3 & 0xfff) << 10;
         }
     }
diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp
index ca0efc8bea169..2f259f7efffbd 100644
--- a/src/coreclr/jit/lsraloongarch64.cpp
+++ b/src/coreclr/jit/lsraloongarch64.cpp
@@ -256,6 +256,12 @@ int LinearScan::BuildNode(GenTree* tree)
                 // everything is made explicit by adding casts.
                 assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
             }
+            else if (tree->gtOverflow())
+            {
+                // Need a register different from target reg to check for overflow.
+                buildInternalIntRegisterDefForNode(tree);
+                setInternalRegsDelayFree = true;
+            }
             FALLTHROUGH;
 
         case GT_AND:
@@ -355,12 +361,6 @@ int LinearScan::BuildNode(GenTree* tree)
         case GT_GE:
         case GT_GT:
         case GT_JCMP:
-            if (!varTypeIsFloating(tree))
-            {
-                buildInternalIntRegisterDefForNode(tree);
-                buildInternalIntRegisterDefForNode(tree);
-                buildInternalRegisterUses();
-            }
             srcCount = BuildCmp(tree);
             break;
 
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 3d4bf56ceb391..19deba41f7657 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -862,6 +862,11 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned          argNum,
     curArgTabEntry->SetByteSize(byteSize, isStruct, isFloatHfa);
     curArgTabEntry->SetByteOffset(0);
 
+#ifdef TARGET_LOONGARCH64
+    curArgTabEntry->structFloatFieldType[0] = TYP_UNDEF;
+    curArgTabEntry->structFloatFieldType[1] = TYP_UNDEF;
+#endif
+
     hasRegArgs = true;
     if (argCount >= argTableSize)
     {