Skip to content

Commit

Permalink
Merge between components.
Browse files Browse the repository at this point in the history
Change-Id: If967c8f14d5d9c63fa9f118d7f6c6deecc598d36
  • Loading branch information
Graban, Daniel authored and sys_zuul committed Jul 29, 2019
1 parent 4837bfc commit e2936cf
Show file tree
Hide file tree
Showing 9 changed files with 187 additions and 22 deletions.
2 changes: 2 additions & 0 deletions IGC/Compiler/CISACodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ set(IGC_BUILD__SRC__CISACodeGen_Common
"${CMAKE_CURRENT_SOURCE_DIR}/VectorProcess.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderCodeGen.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderLowering.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ComputeShaderLowering.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/WIAnalysis.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/SLMConstProp.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/POSH_RemoveNonPositionOutput.cpp"
Expand Down Expand Up @@ -166,6 +167,7 @@ set(IGC_BUILD__HDR__CISACodeGen_Common
"${CMAKE_CURRENT_SOURCE_DIR}/VectorProcess.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderCodeGen.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderLowering.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ComputeShaderLowering.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/WIAnalysis.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/SLMConstProp.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/POSH_RemoveNonPositionOutput.h"
Expand Down
42 changes: 24 additions & 18 deletions IGC/Compiler/CISACodeGen/ComputeShaderCodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,27 +125,31 @@ void CComputeShader::ParseShaderSpecificOpcode(llvm::Instruction* inst)
}
}

void CComputeShader::CreateThreadPayloadData(void* & pThreadPayload, uint& threadPayloadSize)
void CComputeShader::CreateThreadPayloadData(void* & pThreadPayload, uint& curbeTotalDataLength, uint& curbeReadLength)
{
typedef uint16_t ThreadPayloadEntry;

// Find the max thread group dimension
const OctEltUnit SIZE_OF_DQWORD = OctEltUnit(2);
const OctEltUnit SIZE_OF_OWORD = OctEltUnit(1);
uint numberOfId = GetNumberOfId();
uint dimX = numLanes(m_dispatchSize);
uint dimY = (iSTD::Align(m_threadGroupSize, dimX)/dimX) * numberOfId;

typedef uint ThreadPayloadEntry;

uint alignedVal = EltUnit(SIZE_OF_DQWORD).Count() * sizeof(DWORD); // Oct Element is 8 DWORDS
// dimX must align to alignment_X bytes (one GRF)
uint alignment_X = EltUnit(SIZE_OF_OWORD).Count() * sizeof(DWORD);
uint dimX_aligned = iSTD::Align(dimX * sizeof(ThreadPayloadEntry), alignment_X) / sizeof(ThreadPayloadEntry);
uint dimY = (iSTD::Align(m_threadGroupSize, dimX) / dimX) * numberOfId;
curbeReadLength = dimX_aligned * numberOfId * sizeof(ThreadPayloadEntry) / alignment_X;

uint alignedVal = EltUnit(SIZE_OF_DQWORD).Count() * sizeof(ThreadPayloadEntry); // Oct Element is 8 Entries
// m_NOSBufferSize is the additional space for cross-thread constant data (constants set by driver).
threadPayloadSize = iSTD::Align( dimX * dimY * sizeof( ThreadPayloadEntry ) + m_NOSBufferSize, alignedVal );
curbeTotalDataLength = iSTD::Align(dimX_aligned * dimY * sizeof(ThreadPayloadEntry) + m_NOSBufferSize, alignedVal);

assert(pThreadPayload == nullptr && "Thread payload should be a null variable");

unsigned threadPayloadEntries = threadPayloadSize / sizeof(ThreadPayloadEntry);
unsigned threadPayloadEntries = curbeTotalDataLength / sizeof(ThreadPayloadEntry);

ThreadPayloadEntry* pThreadPayloadMem =
(ThreadPayloadEntry*)IGC::aligned_malloc(threadPayloadEntries* sizeof(ThreadPayloadEntry), 16);
(ThreadPayloadEntry*)IGC::aligned_malloc(threadPayloadEntries * sizeof(ThreadPayloadEntry), 16);
std::fill(pThreadPayloadMem, pThreadPayloadMem + threadPayloadEntries, 0);

pThreadPayload = pThreadPayloadMem;
Expand All @@ -169,17 +173,17 @@ void CComputeShader::CreateThreadPayloadData(void* & pThreadPayload, uint& threa
uint lane = 0;
if(m_pThread_ID_in_Group_X)
{
pThreadPayloadMem[(y + lane) * dimX + x] = currThreadX;
pThreadPayloadMem[(y + lane) * dimX_aligned + x] = currThreadX;
lane++;
}
if(m_pThread_ID_in_Group_Y)
{
pThreadPayloadMem[(y + lane) * dimX + x] = currThreadY;
pThreadPayloadMem[(y + lane) * dimX_aligned + x] = currThreadY;
lane++;
}
if(m_pThread_ID_in_Group_Z)
{
pThreadPayloadMem[(y + lane) * dimX + x] = currThreadZ;
pThreadPayloadMem[(y + lane) * dimX_aligned + x] = currThreadZ;
lane++;
}

Expand Down Expand Up @@ -259,19 +263,19 @@ CVariable* CComputeShader::CreateThreadIDinGroup(uint channelNum)
case 0:
if(m_pThread_ID_in_Group_X == nullptr)
{
m_pThread_ID_in_Group_X = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
m_pThread_ID_in_Group_X = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_W, EALIGN_GRF, false, m_numberInstance);
}
return m_pThread_ID_in_Group_X;
case 1:
if(m_pThread_ID_in_Group_Y == nullptr)
{
m_pThread_ID_in_Group_Y = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
m_pThread_ID_in_Group_Y = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_W, EALIGN_GRF, false, m_numberInstance);
}
return m_pThread_ID_in_Group_Y;
case 2:
if(m_pThread_ID_in_Group_Z == nullptr)
{
m_pThread_ID_in_Group_Z = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
m_pThread_ID_in_Group_Z = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_W, EALIGN_GRF, false, m_numberInstance);
}
return m_pThread_ID_in_Group_Z;
default:
Expand Down Expand Up @@ -335,6 +339,7 @@ void CComputeShader::AllocatePayload()
{
AllocateInput(m_pThread_ID_in_Group_X, offset, i);
offset += m_pThread_ID_in_Group_X->GetSize();
offset = iSTD::Round(offset, alignmentSize[m_pThread_ID_in_Group_X->GetAlign()]);
}
}

Expand All @@ -344,6 +349,7 @@ void CComputeShader::AllocatePayload()
{
AllocateInput(m_pThread_ID_in_Group_Y, offset, i);
offset += m_pThread_ID_in_Group_Y->GetSize();
offset = iSTD::Round(offset, alignmentSize[m_pThread_ID_in_Group_Y->GetAlign()]);
}
}

Expand All @@ -353,6 +359,7 @@ void CComputeShader::AllocatePayload()
{
AllocateInput(m_pThread_ID_in_Group_Z, offset, i);
offset += m_pThread_ID_in_Group_Z->GetSize();
offset = iSTD::Round(offset, alignmentSize[m_pThread_ID_in_Group_Z->GetAlign()]);
}
}

Expand Down Expand Up @@ -466,8 +473,6 @@ void CComputeShader::FillProgram(SComputeShaderKernelProgram* pKernelProgram)
pKernelProgram->FloatingPointMode = USC::GFX3DSTATE_FLOATING_POINT_IEEE_754;
pKernelProgram->SingleProgramFlow = USC::GFX3DSTATE_PROGRAM_FLOW_MULTIPLE;
pKernelProgram->CurbeReadOffset = 0;
pKernelProgram->CurbeReadLength = GetNumberOfId() * (numLanes(m_dispatchSize) / numLanes(SIMDMode::SIMD8));

pKernelProgram->PhysicalThreadsInGroup = static_cast<int>(
std::ceil((static_cast<float>(m_threadGroupSize) /
static_cast<float>((numLanes(m_dispatchSize))))));
Expand All @@ -487,7 +492,8 @@ void CComputeShader::FillProgram(SComputeShaderKernelProgram* pKernelProgram)
pKernelProgram->ThreadPayloadData = nullptr;
CreateThreadPayloadData(
pKernelProgram->ThreadPayloadData,
pKernelProgram->CurbeTotalDataLength);
pKernelProgram->CurbeTotalDataLength,
pKernelProgram->CurbeReadLength);

pKernelProgram->ThreadGroupSize = m_threadGroupSize;

Expand Down
2 changes: 1 addition & 1 deletion IGC/Compiler/CISACodeGen/ComputeShaderCodeGen.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class CComputeShader : public CShader
void FillProgram(SComputeShaderKernelProgram* pKernelProgram);
void PreCompile() override;
void ExtractGlobalVariables() override;
void CreateThreadPayloadData(void* & pThreadPayload, uint& threadPayloadSize);
void CreateThreadPayloadData(void* & pThreadPayload, uint& curbeTotalDataLength, uint& curbeReadLength);
uint GetNumberOfId();
void ParseShaderSpecificOpcode(llvm::Instruction* inst) override;

Expand Down
116 changes: 116 additions & 0 deletions IGC/Compiler/CISACodeGen/ComputeShaderLowering.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*===================== begin_copyright_notice ==================================
Copyright (c) 2017 Intel Corporation
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
======================= end_copyright_notice ==================================*/
#include "ComputeShaderLowering.hpp"
#include "IGCPassSupport.h"
#include "GenISAIntrinsics/GenIntrinsicInst.h"
#include "AdaptorCommon/ImplicitArgs.hpp"
#include "common/LLVMWarningsPush.hpp"
#include "llvm/IR/Function.h"
#include "common/LLVMWarningsPop.hpp"

using namespace llvm;
using namespace IGC;
using namespace IGC::IGCMD;

class ComputeShaderLowering : public FunctionPass
{
public:
ComputeShaderLowering() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) override;
virtual llvm::StringRef getPassName() const override
{
return "ComputeShaderLowering";
}
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override
{
AU.setPreservesCFG();
AU.addRequired<CodeGenContextWrapper>();
}
static char ID;
protected:
Function* m_function = nullptr;
void shortenThreadID(GenIntrinsicInst& inst, Function &F);
};

char ComputeShaderLowering::ID = 0;

bool ComputeShaderLowering::runOnFunction(Function &F)
{
for(auto BI = F.begin(), BE = F.end(); BI != BE; BI++)
{
for(auto II = BI->begin(), IE = BI->end(); II != IE; II++)
{
if(GenIntrinsicInst* inst = dyn_cast<GenIntrinsicInst>(II))
{
if(inst->getIntrinsicID() == GenISAIntrinsic::GenISA_DCL_SystemValue)
{
shortenThreadID(*inst, F);
}
}
}
}

return true;
}

void ComputeShaderLowering::shortenThreadID(GenIntrinsicInst& inst, Function &F)
{
SGVUsage usage =
static_cast<SGVUsage>(llvm::cast<llvm::ConstantInt>(inst.getOperand(0))->getZExtValue());
if (THREAD_ID_IN_GROUP_X != usage &&
THREAD_ID_IN_GROUP_Y != usage &&
THREAD_ID_IN_GROUP_Z != usage
)
{
return;
}

llvm::Module* module = F.getParent();
IRBuilder<> builder(&inst);
llvm::Value* vSGV = builder.getInt32(usage);
llvm::Function* funcSGV = llvm::GenISAIntrinsic::getDeclaration(module, GenISAIntrinsic::GenISA_DCL_SystemValue, builder.getInt16Ty());
llvm::Value* vSGVCreate = builder.CreateCall(funcSGV, vSGV);
vSGVCreate = builder.CreateZExtOrTrunc(vSGVCreate, builder.getInt32Ty());
vSGVCreate = builder.CreateBitCast(vSGVCreate, inst.getType());
inst.replaceAllUsesWith(vSGVCreate);

return;
}

namespace IGC {
#define PASS_FLAG "igc-compute-shader-lowering"
#define PASS_DESCRIPTION "This is the compute shader lowering pass "
#define PASS_CFG_ONLY false
#define PASS_ANALYSIS true
IGC_INITIALIZE_PASS_BEGIN(ComputeShaderLowering, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
IGC_INITIALIZE_PASS_END(ComputeShaderLowering, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)

FunctionPass* CreateComputeShaderLowering()
{
return new ComputeShaderLowering();
}
}
34 changes: 34 additions & 0 deletions IGC/Compiler/CISACodeGen/ComputeShaderLowering.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*===================== begin_copyright_notice ==================================
Copyright (c) 2017 Intel Corporation
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
======================= end_copyright_notice ==================================*/
#pragma once
#include "common/LLVMWarningsPush.hpp"
#include <llvm/Pass.h>
#include "common/LLVMWarningsPop.hpp"

namespace IGC
{
llvm::FunctionPass* CreateComputeShaderLowering();
}
3 changes: 3 additions & 0 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6779,18 +6779,21 @@ void EmitPass::emitCSSGV(GenIntrinsicInst* inst)
}
case THREAD_ID_IN_GROUP_X:
{
assert(inst->getType() == Type::getInt16Ty(inst->getContext()) && "only 16bit ThreadID is supported now.");
pThreadIdInGroup = csProgram->CreateThreadIDinGroup(0);
m_currShader->CopyVariable(m_destination, pThreadIdInGroup);
break;
}
case THREAD_ID_IN_GROUP_Y:
{
assert(inst->getType() == Type::getInt16Ty(inst->getContext()) && "only 16bit ThreadID is supported now.");
pThreadIdInGroup = csProgram->CreateThreadIDinGroup(1);
m_currShader->CopyVariable(m_destination, pThreadIdInGroup);
break;
}
case THREAD_ID_IN_GROUP_Z:
{
assert(inst->getType() == Type::getInt16Ty(inst->getContext()) && "only 16bit ThreadID is supported now.");
pThreadIdInGroup = csProgram->CreateThreadIDinGroup(2);
m_currShader->CopyVariable(m_destination, pThreadIdInGroup);
break;
Expand Down
4 changes: 4 additions & 0 deletions IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "Compiler/CISACodeGen/LowerGEPForPrivMem.hpp"
#include "Compiler/CISACodeGen/POSH_RemoveNonPositionOutput.h"
#include "Compiler/CISACodeGen/RegisterEstimator.hpp"
#include "Compiler/CISACodeGen/ComputeShaderLowering.hpp"

#include "Compiler/CISACodeGen/SLMConstProp.hpp"
#include "Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.hpp"
Expand Down Expand Up @@ -650,6 +651,9 @@ inline void AddLegalizationPasses(CodeGenContext &ctx, IGCPassManager& mpm)
case ShaderType::DOMAIN_SHADER:
mpm.add(createDomainShaderLoweringPass());
break;
case ShaderType::COMPUTE_SHADER:
mpm.add(CreateComputeShaderLowering());
break;
default:
break;
}
Expand Down
2 changes: 1 addition & 1 deletion IGC/GenISAIntrinsics/Intrinsic_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@
# (dwordAttributeOrSetupIndex, e_interpolation_PSOnly)->anyvector
"GenISA_DCL_ShaderInputVec": ["anyvector",["int","int"],"NoMem"],
"GenISA_DCL_GSinputVec": ["float4",["int","int"],"NoMem"],
"GenISA_DCL_SystemValue": ["anyfloat",["int"],"NoMem"],
"GenISA_DCL_SystemValue": ["any:float",["int"],"NoMem"],
"GenISA_SampleOffsetX": ["float",["int"],"NoMem"],
"GenISA_SampleOffsetY": ["float",["int"],"NoMem"],
"GenISA_PixelPositionX": ["short",[],"NoMem"],
Expand Down
4 changes: 2 additions & 2 deletions visa/Common_ISA.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class G4_Declare;
#define COMMON_ISA_GRF_REG_SIZE (getGRFSize()) /// # of bytes in a CISA GRF register

#define COMMON_ISA_MAX_ADDRREG_WIDTH 8
#define COMMON_ISA_MAX_FILENAME_LENGTH 255
#define COMMON_ISA_MAX_FILENAME_LENGTH 1023

#define COMMON_ISA_MAX_KERNEL_NAME_LEN 255
#define COMMON_ISA_MAX_ADDRESS_OFFSET 4096
Expand Down Expand Up @@ -693,7 +693,7 @@ typedef struct _CISA_INST
dst = *((type *) &buf[byte_pos]); \
byte_pos += sizeof(type);

#define STRING_LEN 512
#define STRING_LEN 1024

struct Common_ISA_Attribute{
char* name;
Expand Down

0 comments on commit e2936cf

Please sign in to comment.