Skip to content

Commit

Permalink
Backout of f6b3af5 due to Functional Regression
Browse files Browse the repository at this point in the history
Change-Id: I26fcbe398f839e67a8172cf84be3e80833e7810b
  • Loading branch information
sys-d3djenkins authored and gfxbot committed Aug 30, 2018
1 parent c31fbdd commit b662d85
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 20 deletions.
10 changes: 2 additions & 8 deletions IGC/Compiler/CISACodeGen/LowerGEPForPrivMem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,6 @@ unsigned int LowerGEPForPrivMem::extractAllocaSize(llvm::AllocaInst* pAlloca)

bool LowerGEPForPrivMem::CheckIfAllocaPromotable(llvm::AllocaInst* pAlloca)
{
auto WI = &getAnalysis<WIAnalysis>();
bool isUniformAlloca = WI->whichDepend(pAlloca) == WIAnalysis::UNIFORM;
if(isUniformAlloca)
{
IRBuilder<> builder(pAlloca);
MDNode* node = MDNode::get(pAlloca->getContext(), ConstantAsMetadata::get(builder.getInt1(true)));
pAlloca->setMetadata("uniform", node);
}
unsigned int allocaSize = extractAllocaSize(pAlloca);
unsigned int allowedAllocaSizeInBytes = MAX_ALLOCA_PROMOTE_GRF_NUM * 4;

Expand All @@ -227,6 +219,8 @@ bool LowerGEPForPrivMem::CheckIfAllocaPromotable(llvm::AllocaInst* pAlloca)
{
return false;
}
auto WI = &getAnalysis<WIAnalysis>();
bool isUniformAlloca = WI->whichDepend(pAlloca) == WIAnalysis::UNIFORM;
if(isUniformAlloca)
{
// Heuristic: for uniform alloca we divide the size by 8 to adjust the pressure
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -750,12 +750,12 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
// Creates intrinsics that will be lowered in the CodeGen and will handle the stack-pointer
Function *stackAllocaFunc = GenISAIntrinsic::getDeclaration(m_currFunction->getParent(), GenISAIntrinsic::GenISA_StackAlloca);
Instruction *simdLaneId16 = CallInst::Create(simdLaneIdFunc, VALUE_NAME("simdLaneId16"), pEntryPoint);
Value *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
Instruction *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
Instruction *simdSize = CallInst::Create(simdSizeFunc, VALUE_NAME("simdSize"), pEntryPoint);
for (auto pAI : allocaInsts)
{
assert(!pAI->use_empty() && "Should not reach here with alloca instruction that has no usage!");
bool isUniform = pAI->getMetadata("uniform") != nullptr;

llvm::IRBuilder<> builder(pAI);
IF_DEBUG_INFO(builder.SetCurrentDebugLocation(emptyDebugLoc));

Expand All @@ -764,8 +764,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
unsigned int bufferSize = m_ModAllocaInfo->getBufferSize(pAI);

Value* bufferOffset = builder.CreateMul(simdSize, ConstantInt::get(typeInt32, scalarBufferOffset), VALUE_NAME(pAI->getName() + ".SIMDBufferOffset"));
Value* increment = isUniform ? builder.getInt32(0) : simdLaneId;
Value* perLaneOffset = builder.CreateMul(increment, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
Value* perLaneOffset = builder.CreateMul(simdLaneId, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
Value* totalOffset = builder.CreateAdd(bufferOffset, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
Value* stackAlloca = builder.CreateCall(stackAllocaFunc, totalOffset, VALUE_NAME("stackAlloca"));
Value* privateBuffer = builder.CreatePointerCast(stackAlloca, pAI->getType(), VALUE_NAME(pAI->getName() + ".privateBuffer"));
Expand All @@ -787,7 +786,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
// PrivateMemoryUsageAnalysis pass, no need to run AddImplicitArgs pass.

Instruction *simdLaneId16 = CallInst::Create(simdLaneIdFunc, VALUE_NAME("simdLaneId16"), pEntryPoint);
Value *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
Instruction *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
Instruction *simdSize = CallInst::Create(simdSizeFunc, VALUE_NAME("simdSize"), pEntryPoint);

Argument* r0Arg = implicitArgs.getArgInFunc(*m_currFunction, ImplicitArg::R0);
Expand All @@ -798,7 +797,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
for (auto pAI : allocaInsts)
{
assert(!pAI->use_empty() && "Should not reach here with alloca instruction that has no usage!");
bool isUniform = pAI->getMetadata("uniform") != nullptr;

llvm::IRBuilder<> builder(pAI);
// Post upgrade to LLVM 3.5.1, it was found that inliner propagates debug info of callee
// in to the alloca. Further, those allocas are somehow hoisted to the top of program.
Expand Down Expand Up @@ -858,8 +857,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)


Value* bufferOffset = builder.CreateMul(simdSize, ConstantInt::get(typeInt32, scalarBufferOffset), VALUE_NAME(pAI->getName() + ".SIMDBufferOffset"));
Value* perLaneOffset = isUniform ? builder.getInt32(0) : simdLaneId;
perLaneOffset = builder.CreateMul(perLaneOffset, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
Value* perLaneOffset = builder.CreateMul(simdLaneId, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
Value* totalOffset = builder.CreateAdd(bufferOffset, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
Value* threadOffset = builder.CreateAdd(privateBase, totalOffset, VALUE_NAME(pAI->getName() + ".threadOffset"));
Value* privateBufferPTR = builder.CreateIntToPtr(threadOffset, Type::getInt8Ty(C)->getPointerTo(ADDRESS_SPACE_PRIVATE), VALUE_NAME(pAI->getName() + ".privateBufferPTR"));
Expand Down Expand Up @@ -905,7 +903,7 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)
ConstantInt *totalPrivateMemPerWIValue = ConstantInt::get(typeInt32, totalPrivateMemPerWI);

Instruction *simdLaneId16 = CallInst::Create(simdLaneIdFunc, VALUE_NAME("simdLaneId16"), pEntryPoint);
Value* simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
Instruction *simdLaneId = ZExtInst::CreateIntegerCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"), pEntryPoint);
Instruction *simdSize = CallInst::Create(simdSizeFunc, VALUE_NAME("simdSize"), pEntryPoint);
BinaryOperator* totalPrivateMemPerThread = BinaryOperator::CreateMul(simdSize, totalPrivateMemPerWIValue, VALUE_NAME("totalPrivateMemPerThread"), pEntryPoint);
ExtractElementInst* r0_5 = ExtractElementInst::Create(r0Arg, ConstantInt::get(typeInt32, 5), VALUE_NAME("r0.5"), pEntryPoint);
Expand Down Expand Up @@ -938,15 +936,14 @@ bool PrivateMemoryResolution::resolveAllocaInstuctions(bool stackCall)

llvm::IRBuilder<> builder(pAI);
IF_DEBUG_INFO(builder.SetCurrentDebugLocation(emptyDebugLoc));
bool isUniform = pAI->getMetadata("uniform") != nullptr;

// Get buffer information from the analysis
unsigned int scalarBufferOffset = m_ModAllocaInfo->getBufferOffset(pAI);
unsigned int bufferSize = m_ModAllocaInfo->getBufferSize(pAI);

Value* bufferOffset = builder.CreateMul(simdSize, ConstantInt::get(typeInt32, scalarBufferOffset), VALUE_NAME(pAI->getName() + ".SIMDBufferOffset"));
Value* bufferOffsetForThread = builder.CreateAdd(perThreadOffset, bufferOffset, VALUE_NAME(pAI->getName() + ".bufferOffsetForThread"));
Value* perLaneOffset = isUniform ? builder.getInt32(0) : simdLaneId;
perLaneOffset = builder.CreateMul(simdLaneId, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
Value* perLaneOffset = builder.CreateMul(simdLaneId, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
Value* totalOffset = builder.CreateAdd(bufferOffsetForThread, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
Value* privateBufferGEP = builder.CreateGEP(privateMemArg, totalOffset, VALUE_NAME(pAI->getName() + ".privateBufferGEP"));
Value* privateBuffer = builder.CreatePointerCast(privateBufferGEP, pAI->getType(), VALUE_NAME(pAI->getName() + ".privateBuffer"));
Expand Down

0 comments on commit b662d85

Please sign in to comment.