Skip to content

Commit

Permalink
When both x/y and x%y are needed (x and y both scalar integer), compute
Browse files Browse the repository at this point in the history
both results with a single div or idiv instruction. This uses new X86ISD
nodes for DIV and IDIV which are introduced during the legalize phase
so that the SelectionDAG's CSE can automatically eliminate redundant
computations.

llvm-svn: 42308
  • Loading branch information
Dan Gohman committed Sep 25, 2007
1 parent 2826f6e commit 3159968
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 25 deletions.
65 changes: 40 additions & 25 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1162,12 +1162,9 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
return NULL;
}

case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM: {
bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
case X86ISD::DIV:
case X86ISD::IDIV: {
bool isSigned = Opcode == X86ISD::IDIV;
if (!isSigned)
switch (NVT) {
default: assert(0 && "Unsupported VT!");
Expand Down Expand Up @@ -1275,31 +1272,49 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
}

unsigned Reg = isDiv ? LoReg : HiReg;
SDOperand Result;
if (Reg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
// Shift it down 8 bits.
Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
Chain = Result.getValue(1);
Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
MVT::i8, Result, SRIdx), 0);
} else {
Result = CurDAG->getCopyFromReg(Chain, Reg, NVT, InFlag);
// Copy the division (low) result, if it is needed.
if (!N.getValue(0).use_empty()) {
SDOperand Result = CurDAG->getCopyFromReg(Chain, LoReg, NVT, InFlag);
Chain = Result.getValue(1);
InFlag = Result.getValue(2);
ReplaceUses(N.getValue(0), Result);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result.Val->dump(CurDAG));
DOUT << "\n";
#endif
}
// Copy the remainder (high) result, if it is needed.
if (!N.getValue(1).use_empty()) {
SDOperand Result;
if (HiReg == X86::AH && Subtarget->is64Bit()) {
// Prevent use of AH in a REX instruction by referencing AX instead.
// Shift it down 8 bits.
Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
Chain = Result.getValue(1);
InFlag = Result.getValue(2);
Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
MVT::i8, Result, SRIdx), 0);
} else {
Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
Chain = Result.getValue(1);
InFlag = Result.getValue(2);
}
ReplaceUses(N.getValue(1), Result);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result.Val->dump(CurDAG));
DOUT << "\n";
#endif
}
ReplaceUses(N.getValue(0), Result);
if (foldedLoad)
ReplaceUses(N1.getValue(1), Chain);

#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DEBUG(Result.Val->dump(CurDAG));
DOUT << "\n";
Indent -= 2;
#endif

Expand Down
43 changes: 43 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,27 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
}

// Divide and remainder are lowered to use div or idiv in legalize in
// order to expose the intermediate computations to trivial CSE. This is
// most noticeable when both x/y and x%y are being computed; they can be
// done with a single div or idiv.
setOperationAction(ISD::SDIV , MVT::i8 , Custom);
setOperationAction(ISD::UDIV , MVT::i8 , Custom);
setOperationAction(ISD::SREM , MVT::i8 , Custom);
setOperationAction(ISD::UREM , MVT::i8 , Custom);
setOperationAction(ISD::SDIV , MVT::i16 , Custom);
setOperationAction(ISD::UDIV , MVT::i16 , Custom);
setOperationAction(ISD::SREM , MVT::i16 , Custom);
setOperationAction(ISD::UREM , MVT::i16 , Custom);
setOperationAction(ISD::SDIV , MVT::i32 , Custom);
setOperationAction(ISD::UDIV , MVT::i32 , Custom);
setOperationAction(ISD::SREM , MVT::i32 , Custom);
setOperationAction(ISD::UREM , MVT::i32 , Custom);
setOperationAction(ISD::SDIV , MVT::i64 , Custom);
setOperationAction(ISD::UDIV , MVT::i64 , Custom);
setOperationAction(ISD::SREM , MVT::i64 , Custom);
setOperationAction(ISD::UREM , MVT::i64 , Custom);

setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
setOperationAction(ISD::BR_CC , MVT::Other, Expand);
Expand Down Expand Up @@ -3393,6 +3414,22 @@ SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
}

SDOperand X86TargetLowering::LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG) {
unsigned Opcode = Op.getOpcode();
MVT::ValueType NVT = Op.getValueType();
bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
unsigned Opc = isSigned ? X86ISD::IDIV : X86ISD::DIV;

SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) };
SDOperand DR = DAG.getNode(Opc, DAG.getVTList(NVT, NVT), Ops, 2);

if (isDiv)
return DR;

return SDOperand(DR.Val, 1);
}

SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
Op.getOperand(0).getValueType() >= MVT::i16 &&
Expand Down Expand Up @@ -4668,6 +4705,10 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerShift(Op, DAG);
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM: return LowerIntegerDivOrRem(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
Expand Down Expand Up @@ -4751,6 +4792,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::DIV: return "X86ISD::DIV";
case X86ISD::IDIV: return "X86ISD::IDIV";
}
}

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,10 @@ namespace llvm {
/// in order to obtain suitable precision.
FRSQRT, FRCP,

/// DIV, IDIV - Unsigned and signed integer division and reciprocal.
///
DIV, IDIV,

// Thread Local Storage
TLSADDR, THREAD_POINTER,

Expand Down Expand Up @@ -420,6 +424,7 @@ namespace llvm {
SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerShift(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFABS(SDOperand Op, SelectionDAG &DAG);
Expand Down
58 changes: 58 additions & 0 deletions llvm/test/CodeGen/X86/divrem.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
; RUN: llvm-as < %s | llc -march=x86-64 | grep div | count 8

define void @si64(i64 %x, i64 %y, i64* %p, i64* %q) {
%r = sdiv i64 %x, %y
%t = srem i64 %x, %y
store i64 %r, i64* %p
store i64 %t, i64* %q
ret void
}
define void @si32(i32 %x, i32 %y, i32* %p, i32* %q) {
%r = sdiv i32 %x, %y
%t = srem i32 %x, %y
store i32 %r, i32* %p
store i32 %t, i32* %q
ret void
}
define void @si16(i16 %x, i16 %y, i16* %p, i16* %q) {
%r = sdiv i16 %x, %y
%t = srem i16 %x, %y
store i16 %r, i16* %p
store i16 %t, i16* %q
ret void
}
define void @si8(i8 %x, i8 %y, i8* %p, i8* %q) {
%r = sdiv i8 %x, %y
%t = srem i8 %x, %y
store i8 %r, i8* %p
store i8 %t, i8* %q
ret void
}
define void @ui64(i64 %x, i64 %y, i64* %p, i64* %q) {
%r = udiv i64 %x, %y
%t = urem i64 %x, %y
store i64 %r, i64* %p
store i64 %t, i64* %q
ret void
}
define void @ui32(i32 %x, i32 %y, i32* %p, i32* %q) {
%r = udiv i32 %x, %y
%t = urem i32 %x, %y
store i32 %r, i32* %p
store i32 %t, i32* %q
ret void
}
define void @ui16(i16 %x, i16 %y, i16* %p, i16* %q) {
%r = udiv i16 %x, %y
%t = urem i16 %x, %y
store i16 %r, i16* %p
store i16 %t, i16* %q
ret void
}
define void @ui8(i8 %x, i8 %y, i8* %p, i8* %q) {
%r = udiv i8 %x, %y
%t = urem i8 %x, %y
store i8 %r, i8* %p
store i8 %t, i8* %q
ret void
}

0 comments on commit 3159968

Please sign in to comment.