diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 16c0ee588fabc6..3502673faacac1 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -993,6 +993,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ZERO_EXTEND_INT, SIGN_EXTEND_INT, #endif + LOAD_ZERO_EXTEND_SMALL_INT, + LOAD_SIGN_EXTEND_SMALL_INT, +#ifdef TARGET_64BIT + LOAD_ZERO_EXTEND_INT, + LOAD_SIGN_EXTEND_INT, +#endif + LOAD_SOURCE }; private: diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 536797eaba9593..0702ee2f737fba 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3898,25 +3898,24 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d // cast - The GT_CAST node // // Assumptions: -// The cast node is not a contained node and must have an assigned register. // Neither the source nor target type can be a floating point type. // -// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register. -// void CodeGen::genIntToIntCast(GenTreeCast* cast) { - genConsumeRegs(cast->gtGetOp1()); + genConsumeRegs(cast->CastOp()); - const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); + GenTree* const src = cast->CastOp(); + const regNumber srcReg = src->isUsedFromReg() ? src->GetRegNum() : REG_NA; const regNumber dstReg = cast->GetRegNum(); + emitter* const emit = GetEmitter(); - assert(genIsValidIntReg(srcReg)); assert(genIsValidIntReg(dstReg)); GenIntCastDesc desc(cast); if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE) { + assert(genIsValidIntReg(srcReg)); genIntCastOverflowCheck(cast, desc, srcReg); } @@ -3944,15 +3943,70 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) ins = INS_sxtw; insSize = 8; break; -#endif - default: - assert(desc.ExtendKind() == GenIntCastDesc::COPY); +#endif // TARGET_64BIT + case GenIntCastDesc::COPY: ins = INS_mov; insSize = desc.ExtendSrcSize(); break; + case GenIntCastDesc::LOAD_ZERO_EXTEND_SMALL_INT: + ins = (desc.ExtendSrcSize() == 1) ? INS_ldrb : INS_ldrh; + insSize = TARGET_POINTER_SIZE; + break; + case GenIntCastDesc::LOAD_SIGN_EXTEND_SMALL_INT: + ins = (desc.ExtendSrcSize() == 1) ? INS_ldrsb : INS_ldrsh; + insSize = TARGET_POINTER_SIZE; + break; +#ifdef TARGET_64BIT + case GenIntCastDesc::LOAD_ZERO_EXTEND_INT: + ins = INS_ldr; + insSize = 4; + break; + case GenIntCastDesc::LOAD_SIGN_EXTEND_INT: + ins = INS_ldrsw; + insSize = 8; + break; +#endif // TARGET_64BIT + case GenIntCastDesc::LOAD_SOURCE: + ins = ins_Load(src->TypeGet()); + insSize = genTypeSize(genActualType(src)); + break; + + default: + unreached(); } - GetEmitter()->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, /* canSkip */ false); + if (srcReg != REG_NA) + { + emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, /* canSkip */ false); + } + else + { + // The "used from memory" case. On ArmArch casts are the only nodes which can have + // contained memory operands, so we have to handle all possible sources "manually". + assert(src->isUsedFromMemory()); + + if (src->isUsedFromSpillTemp()) + { + assert(src->IsRegOptional()); + + TempDsc* tmpDsc = getSpillTempDsc(src); + unsigned tmpNum = tmpDsc->tdTempNum(); + regSet.tmpRlsTemp(tmpDsc); + + emit->emitIns_R_S(ins, EA_ATTR(insSize), dstReg, tmpNum, 0); + } + else if (src->OperIsLocal()) + { + emit->emitIns_R_S(ins, EA_ATTR(insSize), dstReg, src->AsLclVarCommon()->GetLclNum(), + src->AsLclVarCommon()->GetLclOffs()); + } + else + { + assert(src->OperIs(GT_IND) && !src->AsIndir()->IsVolatile() && !src->AsIndir()->IsUnaligned()); + emit->emitIns_R_R_I(ins, EA_ATTR(insSize), dstReg, src->AsIndir()->Base()->GetRegNum(), + static_cast(src->AsIndir()->Offset())); + } + } } genProduceReg(cast); diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index e3ac21ba504a27..d67c23dc85c4b7 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2448,7 +2448,8 @@ void CodeGen::genCodeForCast(GenTreeOp* tree) CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) { - const var_types srcType = genActualType(cast->gtGetOp1()->TypeGet()); + GenTree* const src = cast->CastOp(); + const var_types srcType = genActualType(src); const bool srcUnsigned = cast->IsUnsigned(); const unsigned srcSize = genTypeSize(srcType); const var_types castType = cast->gtCastType; @@ -2457,7 +2458,9 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) const var_types dstType = genActualType(cast->TypeGet()); const unsigned dstSize = genTypeSize(dstType); const bool overflow = cast->gtOverflow(); + const bool castIsLoad = !src->isUsedFromReg(); + assert(castIsLoad == src->isUsedFromMemory()); assert((srcSize == 4) || (srcSize == genTypeSize(TYP_I_IMPL))); assert((dstSize == 4) || (dstSize == genTypeSize(TYP_I_IMPL))); @@ -2473,7 +2476,7 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) // values of the castType without risk of integer overflow. const int castNumBits = (castSize * 8) - (castUnsigned ? 0 : 1); m_checkSmallIntMax = (1 << castNumBits) - 1; - m_checkSmallIntMin = (castUnsigned | srcUnsigned) ? 0 : (-m_checkSmallIntMax - 1); + m_checkSmallIntMin = (castUnsigned || srcUnsigned) ? 0 : (-m_checkSmallIntMax - 1); m_extendKind = COPY; m_extendSrcSize = dstSize; @@ -2568,6 +2571,48 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast) m_extendKind = COPY; m_extendSrcSize = srcSize; } + + if (castIsLoad) + { + const var_types srcLoadType = src->TypeGet(); + + switch (m_extendKind) + { + case ZERO_EXTEND_SMALL_INT: // small type/int/long -> ubyte/ushort + assert(varTypeIsUnsigned(srcLoadType) || (genTypeSize(srcLoadType) >= genTypeSize(castType))); + m_extendKind = LOAD_ZERO_EXTEND_SMALL_INT; + m_extendSrcSize = min(genTypeSize(srcLoadType), genTypeSize(castType)); + break; + + case SIGN_EXTEND_SMALL_INT: // small type/int/long -> byte/short + assert(varTypeIsSigned(srcLoadType) || (genTypeSize(srcLoadType) >= genTypeSize(castType))); + m_extendKind = LOAD_SIGN_EXTEND_SMALL_INT; + m_extendSrcSize = min(genTypeSize(srcLoadType), genTypeSize(castType)); + break; + +#ifdef TARGET_64BIT + case ZERO_EXTEND_INT: // ubyte/ushort/int -> long. + assert(varTypeIsUnsigned(srcLoadType) || (srcLoadType == TYP_INT)); + m_extendKind = varTypeIsSmall(srcLoadType) ? LOAD_ZERO_EXTEND_SMALL_INT : LOAD_ZERO_EXTEND_INT; + m_extendSrcSize = genTypeSize(srcLoadType); + break; + + case SIGN_EXTEND_INT: // byte/short/int -> long. + assert(varTypeIsSigned(srcLoadType) || (srcLoadType == TYP_INT)); + m_extendKind = varTypeIsSmall(srcLoadType) ? LOAD_SIGN_EXTEND_SMALL_INT : LOAD_SIGN_EXTEND_INT; + m_extendSrcSize = genTypeSize(srcLoadType); + break; +#endif // TARGET_64BIT + + case COPY: // long -> long, small type/int/long -> int. + m_extendKind = LOAD_SOURCE; + m_extendSrcSize = 0; + break; + + default: + unreached(); + } + } } #if !defined(TARGET_64BIT) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 271b24c22bebb8..d5bbe76dc1e528 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6752,27 +6752,25 @@ void CodeGen::genIntCastOverflowCheck(GenTreeCast* cast, const GenIntCastDesc& d // cast - The GT_CAST node // // Assumptions: -// The cast node is not a contained node and must have an assigned register. // Neither the source nor target type can be a floating point type. // On x86 casts to (U)BYTE require that the source be in a byte register. // -// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register. -// void CodeGen::genIntToIntCast(GenTreeCast* cast) { - genConsumeRegs(cast->gtGetOp1()); + genConsumeRegs(cast->CastOp()); - const regNumber srcReg = cast->gtGetOp1()->GetRegNum(); + GenTree* const src = cast->CastOp(); + const regNumber srcReg = src->isUsedFromReg() ? src->GetRegNum() : REG_NA; const regNumber dstReg = cast->GetRegNum(); emitter* emit = GetEmitter(); - assert(genIsValidIntReg(srcReg)); assert(genIsValidIntReg(dstReg)); GenIntCastDesc desc(cast); if (desc.CheckKind() != GenIntCastDesc::CHECK_NONE) { + assert(genIsValidIntReg(srcReg)); genIntCastOverflowCheck(cast, desc, srcReg); } @@ -6783,33 +6781,51 @@ void CodeGen::genIntToIntCast(GenTreeCast* cast) switch (desc.ExtendKind()) { case GenIntCastDesc::ZERO_EXTEND_SMALL_INT: + case GenIntCastDesc::LOAD_ZERO_EXTEND_SMALL_INT: ins = INS_movzx; insSize = desc.ExtendSrcSize(); break; case GenIntCastDesc::SIGN_EXTEND_SMALL_INT: + case GenIntCastDesc::LOAD_SIGN_EXTEND_SMALL_INT: ins = INS_movsx; insSize = desc.ExtendSrcSize(); break; #ifdef TARGET_64BIT case GenIntCastDesc::ZERO_EXTEND_INT: + case GenIntCastDesc::LOAD_ZERO_EXTEND_INT: ins = INS_mov; insSize = 4; canSkip = compiler->opts.OptimizationEnabled() && emit->AreUpper32BitsZero(srcReg); break; case GenIntCastDesc::SIGN_EXTEND_INT: + case GenIntCastDesc::LOAD_SIGN_EXTEND_INT: ins = INS_movsxd; insSize = 4; break; #endif - default: - assert(desc.ExtendKind() == GenIntCastDesc::COPY); + case GenIntCastDesc::COPY: ins = INS_mov; insSize = desc.ExtendSrcSize(); canSkip = true; break; + case GenIntCastDesc::LOAD_SOURCE: + ins = ins_Load(src->TypeGet()); + insSize = genTypeSize(src); + break; + + default: + unreached(); } - emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, canSkip); + if (srcReg != REG_NA) + { + emit->emitIns_Mov(ins, EA_ATTR(insSize), dstReg, srcReg, canSkip); + } + else + { + assert(src->isUsedFromMemory()); + inst_RV_TT(ins, EA_ATTR(insSize), dstReg, src); + } genProduceReg(cast); } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 81c0c181bfa3ed..25aaf4541c7aaf 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3858,6 +3858,22 @@ struct GenTreeCast : public GenTreeOp { } #endif + + bool IsZeroExtending() + { + assert(varTypeIsIntegral(CastOp()) && varTypeIsIntegral(CastToType())); + + if (varTypeIsSmall(CastToType())) + { + return varTypeIsUnsigned(CastToType()); + } + if (TypeIs(TYP_LONG) && genActualTypeIsInt(CastOp())) + { + return IsUnsigned(); + } + + return false; + } }; // GT_BOX nodes are place markers for boxed values. The "real" tree diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index bc53afad80c7a3..de8a743cbfff72 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -288,11 +288,7 @@ CONFIG_INTEGER(JitNoRangeChks, W("JitNoRngChks"), 0) // If 1, don't generate ran // AltJitAssertOnNYI should be 0 on targets where JIT is under development or bring up stage, so as to facilitate // fallback to main JIT on hitting a NYI. -#if defined(TARGET_ARM64) || defined(TARGET_X86) -CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 0) // Controls the AltJit behavior of NYI stuff -#else // !defined(TARGET_ARM64) && !defined(TARGET_X86) CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the AltJit behavior of NYI stuff -#endif // defined(TARGET_ARM64) || defined(TARGET_X86) CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 1) // Enable the register allocator to support EH-write thru: // partial enregistration of vars exposed on EH boundaries diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index e7feb1f819a14e..78c9d85b59ed04 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2828,6 +2828,11 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) op2->ClearContained(); } } + else + { + castOp->ClearContained(); + } + cmp->AsOp()->gtOp1 = castOp; BlockRange().Remove(cast); @@ -5352,6 +5357,7 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par #ifdef TARGET_ARM64 if ((index != nullptr) && index->OperIs(GT_CAST) && (scale == 1) && (offset == 0) && varTypeIsByte(targetType)) { + index->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands. MakeSrcContained(addrMode, index); } @@ -6111,6 +6117,7 @@ void Lowering::LowerShift(GenTreeOp* shift) // The parent was replaced, clear contain and regOpt flag. shift->gtOp2->ClearContained(); } + ContainCheckShiftRotate(shift); #ifdef TARGET_ARM64 @@ -6129,13 +6136,13 @@ void Lowering::LowerShift(GenTreeOp* shift) unsigned dstBits = genTypeSize(cast) * BITS_PER_BYTE; unsigned srcBits = varTypeIsSmall(cast->CastToType()) ? genTypeSize(cast->CastToType()) * BITS_PER_BYTE : genTypeSize(cast->CastOp()) * BITS_PER_BYTE; - assert(!cast->CastOp()->isContained()); // It has to be an upcast and CNS must be in [1..srcBits) range if ((srcBits < dstBits) && (cns->IconValue() > 0) && (cns->IconValue() < srcBits)) { JITDUMP("Recognized ubfix/sbfix pattern in LSH(CAST, CNS). Changing op to GT_BFIZ"); shift->ChangeOper(GT_BFIZ); + cast->CastOp()->ClearContained(); // Uncontain any memory operands. MakeSrcContained(shift, cast); } } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index b0d880c030227a..f37aa6d2be5e03 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -388,10 +388,11 @@ class Lowering final : public Phase return op; } - GenTreeCast* cast = op->AsCast(); + GenTreeCast* cast = op->AsCast(); + GenTree* castOp = cast->CastOp(); // FP <-> INT casts should be kept - if (varTypeIsFloating(cast->CastFromType()) ^ varTypeIsFloating(expectedType)) + if (varTypeIsFloating(castOp) ^ varTypeIsFloating(expectedType)) { return op; } @@ -402,17 +403,23 @@ class Lowering final : public Phase return op; } + // Keep casts with operands usable from memory. + if (castOp->isContained() || castOp->IsRegOptional()) + { + return op; + } + if (genTypeSize(cast->CastToType()) >= genTypeSize(expectedType)) { #ifndef TARGET_64BIT // Don't expose TYP_LONG on 32bit - if (varTypeIsLong(cast->CastFromType())) + if (castOp->TypeIs(TYP_LONG)) { return op; } #endif BlockRange().Remove(op); - return cast->CastOp(); + return castOp; } return op; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 0ff88151503201..9cab1d427ad1f2 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -307,11 +307,13 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) mul->SetUnsigned(); } + op1->CastOp()->ClearContained(); // Uncontain any memory operands. mul->gtOp1 = op1->CastOp(); BlockRange().Remove(op1); if (op2->OperIs(GT_CAST)) { + op2->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands. mul->gtOp2 = op2->AsCast()->CastOp(); BlockRange().Remove(op2); } @@ -1881,6 +1883,7 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) if (cast->gtGetOp1()->TypeIs(TYP_INT) && cast->TypeIs(TYP_LONG) && !cast->gtOverflow()) { node->ChangeOper(GT_ADDEX); + cast->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands. MakeSrcContained(node, cast); } } @@ -2024,11 +2027,63 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const // void Lowering::ContainCheckCast(GenTreeCast* node) { -#ifdef TARGET_ARM GenTree* castOp = node->CastOp(); var_types castToType = node->CastToType(); - var_types srcType = castOp->TypeGet(); + if (comp->opts.OptimizationEnabled() && !node->gtOverflow() && varTypeIsIntegral(castOp) && + varTypeIsIntegral(castToType)) + { + // Most integral casts can be re-expressed as loads, except those that would be changing the sign. + if (!varTypeIsSmall(castOp) || (varTypeIsUnsigned(castOp) == node->IsZeroExtending())) + { + bool srcIsContainable = false; + + // Make sure to only contain indirections codegen can handle. + if (castOp->OperIs(GT_IND)) + { + GenTreeIndir* indir = castOp->AsIndir(); + + if (!indir->IsVolatile() && !indir->IsUnaligned()) + { + GenTree* addr = indir->Addr(); + + if (!addr->isContained()) + { + srcIsContainable = true; + } + else if (addr->OperIs(GT_LEA) && !addr->AsAddrMode()->HasIndex()) + { + var_types loadType = varTypeIsSmall(castToType) ? castToType : castOp->TypeGet(); + + if (emitter::emitIns_valid_imm_for_ldst_offset(addr->AsAddrMode()->Offset(), + emitTypeSize(loadType))) + { + srcIsContainable = true; + } + } + } + } + else + { + assert(castOp->OperIsLocalRead() || !IsContainableMemoryOp(castOp)); + srcIsContainable = true; + } + + if (srcIsContainable && IsSafeToContainMem(node, castOp)) + { + if (IsContainableMemoryOp(castOp)) + { + MakeSrcContained(node, castOp); + } + else + { + castOp->SetRegOptional(); + } + } + } + } + +#ifdef TARGET_ARM if (varTypeIsLong(castOp)) { assert(castOp->OperGet() == GT_LONG); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 5f2e384daa7ce7..a7b1747d54bc09 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -5135,28 +5135,52 @@ void Lowering::ContainCheckCast(GenTreeCast* node) srcType = varTypeToUnsigned(srcType); } - if (!node->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(srcType))) + if (!node->gtOverflow()) { + // Some casts will be able to use the source from memory. + bool srcIsContainable = false; + + if (varTypeIsFloating(castToType) || varTypeIsFloating(srcType)) + { #ifdef DEBUG - // If converting to float/double, the operand must be 4 or 8 byte in size. - if (varTypeIsFloating(castToType)) + // If converting to float/double, the operand must be 4 or 8 byte in size. + if (varTypeIsFloating(castToType)) + { + unsigned opSize = genTypeSize(srcType); + assert(opSize == 4 || opSize == 8); + } +#endif // DEBUG + + // U8 -> R8 conversion requires that the operand be in a register. + if (srcType != TYP_ULONG) + { + if (castOp->IsCnsNonZeroFltOrDbl()) + { + MakeSrcContained(node, castOp); + } + else + { + srcIsContainable = true; + } + } + } + else if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(castOp) && varTypeIsIntegral(castToType)) { - unsigned opSize = genTypeSize(srcType); - assert(opSize == 4 || opSize == 8); + // Most integral casts can be re-expressed as loads, except those that would be changing the sign. + if (!varTypeIsSmall(castOp) || (varTypeIsUnsigned(castOp) == node->IsZeroExtending())) + { + srcIsContainable = true; + } } -#endif // DEBUG - // U8 -> R8 conversion requires that the operand be in a register. - if (srcType != TYP_ULONG) + if (srcIsContainable && IsSafeToContainMem(node, castOp)) { - if ((IsContainableMemoryOp(castOp) && IsSafeToContainMem(node, castOp)) || castOp->IsCnsNonZeroFltOrDbl()) + if (IsContainableMemoryOp(castOp)) { MakeSrcContained(node, castOp); } else { - // Mark castOp as reg optional to indicate codegen - // can still generate code if it is on stack. castOp->SetRegOptional(); } }