Skip to content

Commit

Permalink
ARM64: Fix for Multiplication with Overflow Check
Browse files Browse the repository at this point in the history
For 4 byte integer multiplication, JIT emits a bad-code which is valid
only for 8 byte (i8) multiplication.
For the fix, I use ```smull```(signed)/```umull```(unsigned) instructions
that contain 8 byte results from 4 byte by 4 byte multiplication.
So only one multiplication is needed instead of two for this case.
By simply shifting the results, we could get the upper results that is
used to detect overflow.
Similar transform is made for the unsigned case.
Lower is also changed to reserve a register for overflow check.

Before
smulh   w10, w8, w9  --> Incorrect use: smulh is for obtaining the upper
bits [127:64] of x8 * x9
mul     w8, w8, w9
cmp     x10, x8, ASR dotnet#63

After
smull   x8, x8, x9   --> x8 = w8 * w9
lsr     x10, x8, dotnet#32 --> shift the upper bit of x8 to get sign bit
cmp     w10, w8, ASR #31 --> check sign bit
  • Loading branch information
kyulee1 committed Mar 2, 2016
1 parent 9c84847 commit 445a810
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 34 deletions.
75 changes: 48 additions & 27 deletions src/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10824,7 +10824,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
}
bool isMulOverflow = false;
bool isUnsignedMul = false;
instruction ins2 = INS_invalid;
regNumber extraReg = REG_NA;
if (dst->gtOverflowEx())
{
Expand All @@ -10840,7 +10839,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
{
isMulOverflow = true;
isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0);
ins2 = isUnsignedMul ? INS_umulh : INS_smulh;
assert(intConst == nullptr); // overflow format doesn't support an int constant operand
}
else
Expand All @@ -10856,43 +10854,66 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
{
if (isMulOverflow)
{
// Make sure that we have an internal register
assert(genCountBits(dst->gtRsvdRegs) == 2);

// There will be two bits set in tmpRegsMask.
// Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
assert(tmpRegsMask != RBM_NONE);
regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask

if (isUnsignedMul)
{
assert(genCountBits(dst->gtRsvdRegs) == 1);
extraReg = genRegNumFromMask(dst->gtRsvdRegs);
if (attr == EA_4BYTE)
{
// Compute 8 byte results from 4 byte by 4 byte multiplication.
emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);

// Compute the high result
emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
// Get the high result by shifting dst.
emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
}
else
{
assert(attr == EA_8BYTE);
// Compute the high result.
emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);

emitIns_R_I(INS_cmp, EA_8BYTE, extraReg, 0);
codeGen->genCheckOverflow(dst);
// Now multiply without skewing the high result.
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
}

// Now multiply without skewing the high result if no overflow.
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
// zero-sign bit comparision to detect overflow.
emitIns_R_I(INS_cmp, attr, extraReg, 0);
}
else
{
// Make sure that we have an internal register
assert(genCountBits(dst->gtRsvdRegs) == 2);

// There will be two bits set in tmpRegsMask.
// Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
int bitShift = 0;
if (attr == EA_4BYTE)
{
// Compute 8 byte results from 4 byte by 4 byte multiplication.
emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);

// Make sure the two registers are not the same.
assert(extraReg != dst->gtRegNum);
// Get the high result by shifting dst.
emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);

// Save the high result in a temporary register
emitIns_R_R_R(ins2, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
bitShift = 31;
}
else
{
assert(attr == EA_8BYTE);
// Save the high result in a temporary register.
emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);

// Now multiply without skewing the high result.
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
// Now multiply without skewing the high result.
emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);

emitIns_R_R_I(INS_cmp, EA_8BYTE, extraReg, dst->gtRegNum, 63, INS_OPTS_ASR);
bitShift = 63;
}

codeGen->genCheckOverflow(dst);
// Sign bit comparision to detect overflow.
emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR);
}
}
else
Expand All @@ -10902,7 +10923,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
}
}

if (dst->gtOverflowEx() && !isMulOverflow)
if (dst->gtOverflowEx())
{
assert(!varTypeIsFloating(dst));
codeGen->genCheckOverflow(dst);
Expand Down
7 changes: 1 addition & 6 deletions src/jit/lowerarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,12 +347,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
break;

case GT_MUL:
if ((tree->gtFlags & GTF_UNSIGNED) != 0)
{
// unsigned mul should only need one register
info->internalIntCount = 1;
}
else if (tree->gtOverflow())
if (tree->gtOverflow())
{
// Need a register different from target reg to check
// for signed overflow.

This comment has been minimized.

Copy link
@sivarv

sivarv Mar 2, 2016

Minor - "to check for overflow"
(word "signed" can be dropped now).

This comment has been minimized.

Copy link
@kyulee1

kyulee1 Mar 2, 2016

Author Owner

Will address it in the next checkin.

Expand Down
2 changes: 1 addition & 1 deletion tests/arm64/Tests.lst
Original file line number Diff line number Diff line change
Expand Up @@ -5247,7 +5247,7 @@ WorkingDir=JIT\Regression\CLR-x86-JIT\V1-M09.5-PDC\b32093\b32093
MaxAllowedDurationSeconds=600
HostStyle=Any
Expected=100
Categories=JIT;EXPECTED_FAIL;NEED_TRIAGE
Categories=JIT;EXPECTED_PASS
[lclfldrem_cs_ro.exe_2875]
RelativePath=JIT\Directed\coverage\oldtests\lclfldrem_cs_ro\lclfldrem_cs_ro.exe
WorkingDir=JIT\Directed\coverage\oldtests\lclfldrem_cs_ro
Expand Down

1 comment on commit 445a810

@sivarv
Copy link

@sivarv sivarv commented on 445a810 Mar 2, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good.

Please sign in to comment.