From 8cb3c6187f9e1338f22f9e37c4dfe615123eaa59 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 29 Nov 2023 23:26:28 +0000 Subject: [PATCH] Add Arm64 encodings for IF_SVE_AF_3A to IF_SVE_AQ_3A (#95337) * Add Arm64 encodings for IF_SVE_AF_3A to IF_SVE_AQ_3A * INS_OPTS_SCALABLE_n_TO_SIMD_SCALAR Change-Id: I068d2761af40d22f8850e64d136a81275ce4ca5f * placeholder * Block out unsupported instructions * nits --- src/coreclr/jit/codegenarm64.cpp | 127 ++++++++++- src/coreclr/jit/emitarm64.cpp | 368 ++++++++++++++++++++++++++++--- src/coreclr/jit/emitarm64.h | 41 +++- src/coreclr/jit/instr.h | 13 +- 4 files changed, 500 insertions(+), 49 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 11994ff95f88d..46346c4cd7ffc 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5437,6 +5437,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) // #define ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // #define ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // #define ALL_ARM64_EMITTER_UNIT_TESTS_SVE +// #define ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED #if defined(DEBUG) void CodeGen::genArm64EmitterUnitTests() @@ -10194,11 +10195,11 @@ void CodeGen::genArm64EmitterUnitTests() // IF_SVE_CN_3A theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, - INS_OPTS_SCALABLE_H_TO_SIMD); /* CLASTA , , , . */ + INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR); /* CLASTA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, - INS_OPTS_SCALABLE_S_TO_SIMD); /* CLASTB , , , . */ + INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR); /* CLASTB , , , . */ theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_V14, REG_P0, REG_V17, - INS_OPTS_SCALABLE_D_TO_SIMD); /* CLASTB , , , . */ + INS_OPTS_SCALABLE_D_TO_SIMD_SCALAR); /* CLASTB , , , . */ // IF_SVE_CO_3A // Note: EA_4BYTE used for B and H (destination register is W) @@ -10299,21 +10300,23 @@ void CodeGen::genArm64EmitterUnitTests() // IF_SVE_HJ_3A theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_2BYTE, REG_V21, REG_P6, REG_V14, - INS_OPTS_SCALABLE_H_TO_SIMD); /* FADDA , , , . */ + INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR); /* FADDA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_4BYTE, REG_V22, REG_P5, REG_V13, - INS_OPTS_SCALABLE_S_TO_SIMD); /* FADDA , , , . */ + INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR); /* FADDA , , , . */ theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_8BYTE, REG_V23, REG_P4, REG_V12, - INS_OPTS_SCALABLE_D_TO_SIMD); /* FADDA , , , . */ + INS_OPTS_SCALABLE_D_TO_SIMD_SCALAR); /* FADDA , , , . */ + // IF_SVE_HL_3A theEmitter->emitIns_R_R_R(INS_sve_fabd, EA_SCALABLE, REG_V24, REG_P3, REG_V11, INS_OPTS_SCALABLE_H); /* FABD ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V25, REG_P2, REG_V10, INS_OPTS_SCALABLE_S); /* FADD ., /M, ., . */ - // These are not yet supported by capstone. - // theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); /* FAMAX ., /M, ., . */ - // theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); - /* FAMIN ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); +/* FAMIN ., /M, ., . */ +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED theEmitter->emitIns_R_R_R(INS_sve_fdiv, EA_SCALABLE, REG_V28, REG_P0, REG_V7, INS_OPTS_SCALABLE_S); /* FDIV ., /M, ., . */ theEmitter->emitIns_R_R_R(INS_sve_fdivr, EA_SCALABLE, REG_V29, REG_P1, REG_V6, @@ -10337,6 +10340,110 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29, INS_OPTS_SCALABLE_D); /* FSUBR ., /M, ., . */ + // IF_SVE_AF_3A + theEmitter->emitIns_R_R_R(INS_sve_andv, EA_1BYTE, REG_V0, REG_P0, REG_V0, + INS_OPTS_SCALABLE_B_TO_SIMD_SCALAR); /* ANDV , , . */ + theEmitter->emitIns_R_R_R(INS_sve_eorv, EA_2BYTE, REG_V1, REG_P1, REG_V1, + INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR); /* EORV , , . */ + theEmitter->emitIns_R_R_R(INS_sve_orv, EA_4BYTE, REG_V2, REG_P2, REG_V2, + INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR); /* ORV , , . */ + theEmitter->emitIns_R_R_R(INS_sve_orv, EA_8BYTE, REG_V3, REG_P3, REG_V3, + INS_OPTS_SCALABLE_D_TO_SIMD_SCALAR); /* ORV , , . */ + +// IF_SVE_AG_3A +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + theEmitter->emitIns_R_R_R(INS_sve_andqv, EA_1BYTE, REG_V4, REG_P4, REG_V4, INS_OPTS_SCALABLE_B_TO_SIMD_VECTOR); + /* ANDQV ., , . */ + theEmitter->emitIns_R_R_R(INS_sve_eorqv, EA_2BYTE, REG_V5, REG_P5, REG_V5, INS_OPTS_SCALABLE_H_TO_SIMD_VECTOR); + /* EORQV ., , . */ + theEmitter->emitIns_R_R_R(INS_sve_orqv, EA_4BYTE, REG_V6, REG_P6, REG_V6, INS_OPTS_SCALABLE_S_TO_SIMD_VECTOR); + /* ORQV ., , . */ + theEmitter->emitIns_R_R_R(INS_sve_orqv, EA_8BYTE, REG_V7, REG_P7, REG_V7, INS_OPTS_SCALABLE_D_TO_SIMD_VECTOR); +/* ORQV ., , . */ +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + + // IF_SVE_AI_3A + theEmitter->emitIns_R_R_R(INS_sve_saddv, EA_1BYTE, REG_V1, REG_P4, REG_V2, + INS_OPTS_SCALABLE_B_TO_SIMD_SCALAR); /* SADDV
, , . */ + theEmitter->emitIns_R_R_R(INS_sve_saddv, EA_2BYTE, REG_V2, REG_P5, REG_V3, + INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR); /* SADDV
, , . */ + theEmitter->emitIns_R_R_R(INS_sve_uaddv, EA_4BYTE, REG_V3, REG_P6, REG_V4, + INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR); /* UADDV
, , . */ + +// IF_SVE_AJ_3A +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + theEmitter->emitIns_R_R_R(INS_sve_addqv, EA_8BYTE, REG_V21, REG_V7, REG_P22, INS_OPTS_SCALABLE_B_TO_SIMD_VECTOR); +/* ADDQV ., , . */ +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + + // IF_SVE_AK_3A + theEmitter->emitIns_R_R_R(INS_sve_smaxv, EA_8BYTE, REG_V15, REG_P7, REG_V4, + INS_OPTS_SCALABLE_D_TO_SIMD_SCALAR); /* SMAXV , , . */ + theEmitter->emitIns_R_R_R(INS_sve_sminv, EA_4BYTE, REG_V16, REG_P6, REG_V14, + INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR); /* SMINV , , . */ + theEmitter->emitIns_R_R_R(INS_sve_umaxv, EA_2BYTE, REG_V17, REG_P5, REG_V24, + INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR); /* UMAXV , , . */ + theEmitter->emitIns_R_R_R(INS_sve_uminv, EA_1BYTE, REG_V18, REG_P4, REG_V31, + INS_OPTS_SCALABLE_B_TO_SIMD_SCALAR); /* UMINV , , . */ + +// IF_SVE_AL_3A +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + theEmitter->emitIns_R_R_R(INS_sve_smaxqv, EA_1BYTE, REG_V0, REG_P5, REG_V25, INS_OPTS_SCALABLE_B_TO_SIMD_VECTOR); + /* SMAXQV ., , . */ + theEmitter->emitIns_R_R_R(INS_sve_sminqv, EA_2BYTE, REG_V1, REG_P4, REG_V24, INS_OPTS_SCALABLE_H_TO_SIMD_VECTOR); + /* SMINQV ., , . */ + theEmitter->emitIns_R_R_R(INS_sve_umaxqv, EA_4BYTE, REG_V2, REG_P3, REG_V23, INS_OPTS_SCALABLE_S_TO_SIMD_VECTOR); + /* UMAXQV ., , . */ + theEmitter->emitIns_R_R_R(INS_sve_uminqv, EA_8BYTE, REG_V3, REG_P2, REG_V22, INS_OPTS_SCALABLE_D_TO_SIMD_VECTOR); +/* UMINQV ., , . */ +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + + // IF_SVE_AP_3A + theEmitter->emitIns_R_R_R(INS_sve_cls, EA_SCALABLE, REG_V31, REG_P0, REG_V0, + INS_OPTS_SCALABLE_B); /* CLS ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_clz, EA_SCALABLE, REG_V30, REG_P1, REG_V1, + INS_OPTS_SCALABLE_H); /* CLZ ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_cnot, EA_SCALABLE, REG_V29, REG_P2, REG_V2, + INS_OPTS_SCALABLE_S); /* CNOT ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_cnt, EA_SCALABLE, REG_V28, REG_P3, REG_V3, + INS_OPTS_SCALABLE_D); /* CNT ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_fabs, EA_SCALABLE, REG_V27, REG_P4, REG_V4, + INS_OPTS_SCALABLE_H); /* FABS ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_fneg, EA_SCALABLE, REG_V26, REG_P5, REG_V5, + INS_OPTS_SCALABLE_S); /* FNEG ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_not, EA_SCALABLE, REG_V25, REG_P6, REG_V6, + INS_OPTS_SCALABLE_B); /* NOT ., /M, . */ + + // IF_SVE_AQ_3A + theEmitter->emitIns_R_R_R(INS_sve_abs, EA_SCALABLE, REG_V24, REG_P7, REG_V7, + INS_OPTS_SCALABLE_B); /* ABS ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_neg, EA_SCALABLE, REG_V23, REG_P0, REG_V8, + INS_OPTS_SCALABLE_S); /* NEG ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_sxtb, EA_SCALABLE, REG_V22, REG_P1, REG_V9, + INS_OPTS_SCALABLE_H); /* SXTB ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_sxtb, EA_SCALABLE, REG_V22, REG_P1, REG_V9, + INS_OPTS_SCALABLE_S); /* SXTB ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_sxtb, EA_SCALABLE, REG_V22, REG_P1, REG_V9, + INS_OPTS_SCALABLE_D); /* SXTB ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_sxth, EA_SCALABLE, REG_V21, REG_P2, REG_V10, + INS_OPTS_SCALABLE_S); /* SXTH ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_sxth, EA_SCALABLE, REG_V21, REG_P2, REG_V10, + INS_OPTS_SCALABLE_D); /* SXTH ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_sxtw, EA_SCALABLE, REG_V20, REG_P3, REG_V11, + INS_OPTS_SCALABLE_D); /* SXTW .D, /M, .D */ + theEmitter->emitIns_R_R_R(INS_sve_uxtb, EA_SCALABLE, REG_V19, REG_P4, REG_V12, + INS_OPTS_SCALABLE_H); /* UXTB ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_uxtb, EA_SCALABLE, REG_V19, REG_P4, REG_V12, + INS_OPTS_SCALABLE_S); /* UXTB ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_uxtb, EA_SCALABLE, REG_V19, REG_P4, REG_V12, + INS_OPTS_SCALABLE_D); /* UXTB ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_uxth, EA_SCALABLE, REG_V18, REG_P5, REG_V13, + INS_OPTS_SCALABLE_S); /* UXTH ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_uxth, EA_SCALABLE, REG_V18, REG_P5, REG_V13, + INS_OPTS_SCALABLE_D); /* UXTH ., /M, . */ + theEmitter->emitIns_R_R_R(INS_sve_uxtw, EA_SCALABLE, REG_V17, REG_P6, REG_V14, + INS_OPTS_SCALABLE_D); /* UXTW .D, /M, .D */ + #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE #ifdef ALL_ARM64_EMITTER_UNIT_TESTS diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ee65ab8acd6e1..c2ebe6030a6da 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -983,23 +983,25 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; - // Scalable to Simd. + // Scalable to SIMD scalar. case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) + case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) elemsize = id->idOpSize(); - assert(insOptsScalableToSimd(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm + assert(insOptsScalableToSimdScalar(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isValidVectorElemsize(elemsize)); break; - // Scalable to FP Simd. + // Scalable to FP SIMD scalar. case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) elemsize = id->idOpSize(); - assert(insOptsScalableToSimdFloat(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm + assert(insOptsScalableToSimdFPScalar(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm assert(isValidVectorElemsizeSveFloat(elemsize)); break; @@ -1024,6 +1026,77 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + // Scalable to Simd Vector. + case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) + case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) + case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) + elemsize = id->idOpSize(); + assert(insOptsScalableToSimdVector(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(elemsize)); + break; + + // Scalable, widening to scalar SIMD. + case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalableWideningToSimdScalar(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsizeWidening(elemsize)); + break; + + // Scalable, possibly FP. + case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) + switch (id->idIns()) + { + case INS_sve_fabs: + case INS_sve_fneg: + assert(insOptsScalableFloat(id->idInsOpt())); // xx + break; + + default: + assert(insOptsScalableSimple(id->idInsOpt())); // xx + break; + } + elemsize = id->idOpSize(); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated) + switch (id->idIns()) + { + case INS_sve_abs: + case INS_sve_neg: + assert(insOptsScalableSimple(id->idInsOpt())); + break; + + case INS_sve_sxtb: + case INS_sve_uxtb: + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + break; + + case INS_sve_sxth: + case INS_sve_uxth: + assert(insOptsScalableWords(id->idInsOpt())); + break; + + default: + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + break; + } + elemsize = id->idOpSize(); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -8184,6 +8257,68 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_AE_3A; break; + case INS_sve_andv: + case INS_sve_eorv: + case INS_sve_orv: + assert(isFloatReg(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableToSimdScalar(opt)); + fmt = IF_SVE_AF_3A; + break; + + case INS_sve_andqv: + case INS_sve_eorqv: + case INS_sve_orqv: + unreached(); // TODO-SVE: Not yet supported. + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableToSimdVector(opt)); + fmt = IF_SVE_AG_3A; + break; + + case INS_sve_saddv: + case INS_sve_uaddv: + assert(isFloatReg(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableToSimdScalar(opt)); + fmt = IF_SVE_AI_3A; + break; + + case INS_sve_addqv: + unreached(); // TODO-SVE: Not yet supported. + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableToSimdVector(opt)); + fmt = IF_SVE_AJ_3A; + break; + + case INS_sve_smaxv: + case INS_sve_sminv: + case INS_sve_umaxv: + case INS_sve_uminv: + assert(isFloatReg(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableToSimdScalar(opt)); + fmt = IF_SVE_AK_3A; + break; + + case INS_sve_smaxqv: + case INS_sve_sminqv: + case INS_sve_umaxqv: + case INS_sve_uminqv: + unreached(); // TODO-SVE: Not yet supported. + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableToSimdVector(opt)); + fmt = IF_SVE_AL_3A; + break; + case INS_sve_asrr: case INS_sve_lslr: case INS_sve_lsrr: @@ -8211,6 +8346,63 @@ void emitter::emitIns_R_R_R( } break; + case INS_sve_clz: + case INS_sve_cls: + case INS_sve_cnt: + case INS_sve_not: + case INS_sve_cnot: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_AP_3A; + break; + + case INS_sve_fabs: + case INS_sve_fneg: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableFloat(opt)); + fmt = IF_SVE_AP_3A; + break; + + case INS_sve_abs: + case INS_sve_neg: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_AQ_3A; + break; + + case INS_sve_sxtb: + case INS_sve_uxtb: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableAtLeastHalf(opt)); + fmt = IF_SVE_AQ_3A; + break; + + case INS_sve_sxth: + case INS_sve_uxth: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableWords(opt)); + fmt = IF_SVE_AQ_3A; + break; + + case INS_sve_sxtw: + case INS_sve_uxtw: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_AQ_3A; + break; + case INS_sve_clasta: case INS_sve_clastb: assert(isLowPredicateRegister(reg2)); @@ -8220,7 +8412,7 @@ void emitter::emitIns_R_R_R( assert(isVectorRegister(reg1)); fmt = IF_SVE_CM_3A; } - else if (insOptsScalableToSimd(opt)) + else if (insOptsScalableToSimdScalar(opt)) { assert(isFloatReg(reg1)); assert(isValidVectorElemsize(size)); @@ -8312,15 +8504,13 @@ void emitter::emitIns_R_R_R( assert(isFloatReg(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); - assert(insOptsScalableToSimdFloat(opt)); + assert(insOptsScalableToSimdFPScalar(opt)); assert(isValidVectorElemsizeSveFloat(size)); fmt = IF_SVE_HJ_3A; break; case INS_sve_fabd: case INS_sve_fadd: - case INS_sve_famax: - case INS_sve_famin: case INS_sve_fdiv: case INS_sve_fdivr: case INS_sve_fmax: @@ -8339,6 +8529,16 @@ void emitter::emitIns_R_R_R( fmt = IF_SVE_HL_3A; break; + case INS_sve_famax: + case INS_sve_famin: + unreached(); // TODO-SVE: Not yet supported. + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableFloat(opt)); + fmt = IF_SVE_HL_3A; + break; + default: unreached(); break; @@ -11961,24 +12161,28 @@ void emitter::emitIns_Call(EmitCallType callType, { case INS_OPTS_SCALABLE_B: case INS_OPTS_SCALABLE_WIDE_B: - case INS_OPTS_SCALABLE_B_TO_SIMD: + case INS_OPTS_SCALABLE_B_TO_SIMD_SCALAR: + case INS_OPTS_SCALABLE_B_TO_SIMD_VECTOR: case INS_OPTS_SCALABLE_B_TO_SCALAR: return 0x00000000; case INS_OPTS_SCALABLE_H: case INS_OPTS_SCALABLE_WIDE_H: - case INS_OPTS_SCALABLE_H_TO_SIMD: + case INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR: + case INS_OPTS_SCALABLE_H_TO_SIMD_VECTOR: case INS_OPTS_SCALABLE_H_TO_SCALAR: return 0x00400000; // set the bit at location 22 case INS_OPTS_SCALABLE_S: case INS_OPTS_SCALABLE_WIDE_S: - case INS_OPTS_SCALABLE_S_TO_SIMD: + case INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR: + case INS_OPTS_SCALABLE_S_TO_SIMD_VECTOR: case INS_OPTS_SCALABLE_S_TO_SCALAR: return 0x00800000; // set the bit at location 23 case INS_OPTS_SCALABLE_D: - case INS_OPTS_SCALABLE_D_TO_SIMD: + case INS_OPTS_SCALABLE_D_TO_SIMD_SCALAR: + case INS_OPTS_SCALABLE_D_TO_SIMD_VECTOR: case INS_OPTS_SCALABLE_D_TO_SCALAR: return 0x00C00000; // set the bit at location 23 and 22 @@ -13941,8 +14145,16 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) + case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) + case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) + case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) + case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) + case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) + case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated) case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) @@ -13956,7 +14168,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg - code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm + code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm or nnnnn code |= insEncodeSveElemsize(id->idInsOpt()); // xx dst += emitOutput_Instr(dst, code); break; @@ -14567,11 +14779,12 @@ void emitter::emitDispArrangement(insOpts opt) str = "8b"; break; case INS_OPTS_16B: + case INS_OPTS_SCALABLE_B_TO_SIMD_VECTOR: str = "16b"; break; case INS_OPTS_SCALABLE_B: case INS_OPTS_SCALABLE_WIDE_B: - case INS_OPTS_SCALABLE_B_TO_SIMD: + case INS_OPTS_SCALABLE_B_TO_SIMD_SCALAR: case INS_OPTS_SCALABLE_B_TO_SCALAR: str = "b"; break; @@ -14579,11 +14792,12 @@ void emitter::emitDispArrangement(insOpts opt) str = "4h"; break; case INS_OPTS_8H: + case INS_OPTS_SCALABLE_H_TO_SIMD_VECTOR: str = "8h"; break; case INS_OPTS_SCALABLE_H: case INS_OPTS_SCALABLE_WIDE_H: - case INS_OPTS_SCALABLE_H_TO_SIMD: + case INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR: case INS_OPTS_SCALABLE_H_TO_SCALAR: str = "h"; break; @@ -14591,11 +14805,12 @@ void emitter::emitDispArrangement(insOpts opt) str = "2s"; break; case INS_OPTS_4S: + case INS_OPTS_SCALABLE_S_TO_SIMD_VECTOR: str = "4s"; break; case INS_OPTS_SCALABLE_S: case INS_OPTS_SCALABLE_WIDE_S: - case INS_OPTS_SCALABLE_S_TO_SIMD: + case INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR: case INS_OPTS_SCALABLE_S_TO_SCALAR: str = "s"; break; @@ -14603,10 +14818,11 @@ void emitter::emitDispArrangement(insOpts opt) str = "1d"; break; case INS_OPTS_2D: + case INS_OPTS_SCALABLE_D_TO_SIMD_VECTOR: str = "2d"; break; case INS_OPTS_SCALABLE_D: - case INS_OPTS_SCALABLE_D_TO_SIMD: + case INS_OPTS_SCALABLE_D_TO_SIMD_SCALAR: case INS_OPTS_SCALABLE_D_TO_SCALAR: str = "d"; break; @@ -16172,7 +16388,7 @@ void emitter::emitDispInsHelp( } break; - // Scalable. + // ., /M, ., . case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) @@ -16192,7 +16408,7 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; - // Scalable. Reg3 has elements of size 8 bytes. + // ., /M, ., .D case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispLowPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg @@ -16200,7 +16416,7 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm break; - // Scalable. No predicate type. + // ., , ., . case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispLowPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg @@ -16208,7 +16424,7 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; - // Scalable to general register or SIMD. No predicate type. + // , , , . case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) @@ -16218,6 +16434,38 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm break; + // , , . + case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) + case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., , . + case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) + case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) + case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + //
, , . + case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) + emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., /M, . + case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) + case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -18426,6 +18674,20 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; + // Reduction, logical + case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) + result.insLatency = PERFSCORE_LATENCY_6C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Reduction, arithmetic, D form (worse for B, S and H) + case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) + // Reduction, arithmetic, D form (worse for B, S and H) + case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + // Arithmetic, shift case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) @@ -18433,6 +18695,44 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; + // Count/reverse bits + // Arithmetic, basic + // Floating point absolute value/difference + // Floating point arithmetic + // Logical + case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + case IF_SVE_AQ_3A: + switch (ins) + { + // Arithmetic, basic + case INS_sve_abs: + case INS_sve_neg: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + // Extend, sign or zero + case INS_sve_sxtb: + case INS_sve_sxth: + case INS_sve_sxtw: + case INS_sve_uxtb: + case INS_sve_uxth: + case INS_sve_uxtw: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + // Conditional extract operations, SIMD&FP scalar and vector forms case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar @@ -18512,6 +18812,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_2X; break; + case INS_sve_famax: + case INS_sve_famin: + result.insLatency = PERFSCORE_LATENCY_20C; // TODO-SVE: Placeholder + result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); @@ -18519,6 +18825,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + // Not available in Arm Neoverse N2 Software Optimization Guide. + case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) + case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) + case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) + result.insLatency = PERFSCORE_LATENCY_20C; // TODO-SVE: Placeholder + result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 94ac7336813a2..7dc101bbeeb20 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -718,6 +718,11 @@ inline static bool isValidVectorElemsizeSveFloat(emitAttr size) return (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE); } +inline static bool isValidVectorElemsizeWidening(emitAttr size) +{ + return (size == EA_4BYTE) || (size == EA_2BYTE) || (size == EA_1BYTE); +} + inline static bool isScalableVectorSize(emitAttr size) { return (size == EA_SCALABLE); @@ -857,8 +862,8 @@ inline static bool insOptsConvertIntToFloat(insOpts opt) inline static bool insOptsScalable(insOpts opt) { // Opt is any of the scalable types. - return ((insOptsScalableSimple(opt)) || (insOptsScalableWide(opt)) || (insOptsScalableToSimd(opt)) || - (insOptsScalableToScalar(opt))); + return ((insOptsScalableSimple(opt)) || (insOptsScalableWide(opt)) || (insOptsScalableToSimdScalar(opt)) || + (insOptsScalableToScalar(opt)) || insOptsScalableToSimdVector(opt)); } inline static bool insOptsScalableSimple(insOpts opt) @@ -874,6 +879,12 @@ inline static bool insOptsScalableWords(insOpts opt) return ((opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } +inline static bool insOptsScalableAtLeastHalf(insOpts opt) +{ + // `opt` is any of the standard half and above scalable types. + return ((opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); +} + inline static bool insOptsScalableFloat(insOpts opt) { // `opt` is any of the standard scalable types that are valid for FP. @@ -887,18 +898,32 @@ inline static bool insOptsScalableWide(insOpts opt) (opt == INS_OPTS_SCALABLE_WIDE_S)); } -inline static bool insOptsScalableToSimd(insOpts opt) +inline static bool insOptsScalableToSimdVector(insOpts opt) +{ + // `opt` is any of the scalable types that are valid for conversion to an Advsimd SIMD Vector. + return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD_VECTOR) || (opt == INS_OPTS_SCALABLE_H_TO_SIMD_VECTOR) || + (opt == INS_OPTS_SCALABLE_S_TO_SIMD_VECTOR) || (opt == INS_OPTS_SCALABLE_D_TO_SIMD_VECTOR)); +} + +inline static bool insOptsScalableToSimdScalar(insOpts opt) { // `opt` is any of the scalable types that are valid for conversion to a scalar in a SIMD register. - return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD) || (opt == INS_OPTS_SCALABLE_H_TO_SIMD) || - (opt == INS_OPTS_SCALABLE_S_TO_SIMD) || (opt == INS_OPTS_SCALABLE_D_TO_SIMD)); + return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD_SCALAR) || (opt == INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR) || + (opt == INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR) || (opt == INS_OPTS_SCALABLE_D_TO_SIMD_SCALAR)); } -inline static bool insOptsScalableToSimdFloat(insOpts opt) +inline static bool insOptsScalableToSimdFPScalar(insOpts opt) { // `opt` is any of the scalable types that are valid for conversion to an FP scalar in a SIMD register. - return ((opt == INS_OPTS_SCALABLE_H_TO_SIMD) || (opt == INS_OPTS_SCALABLE_S_TO_SIMD) || - (opt == INS_OPTS_SCALABLE_D_TO_SIMD)); + return ((opt == INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR) || (opt == INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR) || + (opt == INS_OPTS_SCALABLE_D_TO_SIMD_SCALAR)); +} + +inline static bool insOptsScalableWideningToSimdScalar(insOpts opt) +{ + // `opt` is any of the scalable types that are valid for widening then conversion to a scalar in a SIMD register. + return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD_SCALAR) || (opt == INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR) || + (opt == INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR)); } inline static bool insOptsScalableToScalar(insOpts opt) diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index d5efd33cb759a..62f0c25d13cbb 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -279,10 +279,15 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_WIDE_H, INS_OPTS_SCALABLE_WIDE_S, - INS_OPTS_SCALABLE_B_TO_SIMD, - INS_OPTS_SCALABLE_H_TO_SIMD, - INS_OPTS_SCALABLE_S_TO_SIMD, - INS_OPTS_SCALABLE_D_TO_SIMD, + INS_OPTS_SCALABLE_B_TO_SIMD_VECTOR, + INS_OPTS_SCALABLE_H_TO_SIMD_VECTOR, + INS_OPTS_SCALABLE_S_TO_SIMD_VECTOR, + INS_OPTS_SCALABLE_D_TO_SIMD_VECTOR, + + INS_OPTS_SCALABLE_B_TO_SIMD_SCALAR, + INS_OPTS_SCALABLE_H_TO_SIMD_SCALAR, + INS_OPTS_SCALABLE_S_TO_SIMD_SCALAR, + INS_OPTS_SCALABLE_D_TO_SIMD_SCALAR, INS_OPTS_SCALABLE_B_TO_SCALAR, INS_OPTS_SCALABLE_H_TO_SCALAR,