Skip to content

Commit

Permalink
[clang][OpenCL][CodeGen][AMDGPU] Do not use private as the default …
Browse files Browse the repository at this point in the history
…AS for when `generic` is available (#112442)

Currently, for AMDGPU, when compiling for OpenCL, we unconditionally use
`private` as the default address space. This is wrong for cases where
the `generic` address space is available, and is corrected via this
patch. In general, this AS map abuse is a bad hack and we should re-work
it altogether, but at least after this patch we will stop being
incorrect for e.g. OpenCL 2.0.
  • Loading branch information
AlexVlx authored Oct 22, 2024
1 parent aea60ab commit 6e0b003
Show file tree
Hide file tree
Showing 20 changed files with 1,154 additions and 549 deletions.
6 changes: 3 additions & 3 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
TargetInfo::adjust(Diags, Opts);
// ToDo: There are still a few places using default address space as private
// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
// can be removed from the following line.
setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
// address space in OpenCL, which needs to be cleaned up, then the references
// to OpenCL can be removed from the following line.
setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
!isAMDGCN(getTriple()));
}

Expand Down
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CGBlocks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
DI->setLocation(D->getLocation());
DI->EmitDeclareOfBlockLiteralArgVariable(
*BlockInfo, D->getName(), argNum,
cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
Builder);
}
}

Expand Down
11 changes: 9 additions & 2 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5853,8 +5853,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
/*IndexTypeQuals=*/0);
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
llvm::Value *TmpPtr = Tmp.getPointer();
// The EmitLifetime* pair expect a naked Alloca as their last argument,
// however for cases where the default AS is not the Alloca AS, Tmp is
// actually the Alloca ascasted to the default AS, hence the
// stripPointerCasts()
llvm::Value *Alloca = TmpPtr->stripPointerCasts();
llvm::Value *TmpSize = EmitLifetimeStart(
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
llvm::Value *ElemPtr;
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
Expand All @@ -5870,7 +5875,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateAlignedStore(
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
}
return std::tie(ElemPtr, TmpSize, TmpPtr);
// Return the Alloca itself rather than a potential ascast as this is only
// used by the paired EmitLifetimeEnd.
return std::tie(ElemPtr, TmpSize, Alloca);
};

// Could have events and/or varargs.
Expand Down
169 changes: 99 additions & 70 deletions clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

Large diffs are not rendered by default.

36 changes: 20 additions & 16 deletions clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
// CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
// CL20-NEXT: [[ENTRY:.*:]]
// CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CL20-NEXT: store ptr [[X]], ptr addrspace(5) [[X_ADDR]], align 8
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[X_ADDR]], align 8
// CL20-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CL20-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
// CL20-NEXT: store i32 1, ptr [[TMP0]], align 4
// CL20-NEXT: ret void
//
Expand Down Expand Up @@ -54,25 +55,27 @@ void func1(int *x) {
// CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5)
// CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5)
// CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
// CL20-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4
// CL20-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
// CL20-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4
// CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
// CL20-NEXT: [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr
// CL20-NEXT: store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], align 8
// CL20-NEXT: [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST1]]) #[[ATTR2:[0-9]+]]
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4
// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr
// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr
// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr
// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr
// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr
// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4
// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]]
// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4
// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4
// CL20-NEXT: ret void
//
void func2(void) {
int lv1;
lv1 = 1;

int lv2 = 2;

int la[100];
Expand All @@ -99,7 +102,8 @@ void func2(void) {
// CL20-SAME: ) #[[ATTR0]] {
// CL20-NEXT: [[ENTRY:.*:]]
// CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
// CL20-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false)
// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false)
// CL20-NEXT: ret void
//
void func3(void) {
Expand Down
Loading

0 comments on commit 6e0b003

Please sign in to comment.