From 5204fde32431f9b96bd2678f2ef26d7be97348f3 Mon Sep 17 00:00:00 2001 From: Andreas Jonson Date: Sat, 8 Jun 2024 22:17:12 +0200 Subject: [PATCH] [AMDGPU] Swap range metadata to attribute for workitem id. --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 14 ++- .../CodeGen/AMDGPU/amdgpu.private-memory.ll | 21 ++--- .../CodeGen/AMDGPU/private-memory-r600.ll | 13 +-- ...promote-alloca-strip-abi-opt-attributes.ll | 6 +- .../AMDGPU/promote-alloca-to-lds-icmp.ll | 92 +++++++++---------- 5 files changed, 72 insertions(+), 74 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 94ee4ac78142d3..0751c8dc8b8bf6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -560,10 +560,16 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { else ++MaxSize; - MDBuilder MDB(I->getContext()); - MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), - APInt(32, MaxSize)); - I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); + APInt Lower{32, MinSize}; + APInt Upper{32, MaxSize}; + if (auto *CI = dyn_cast(I)) { + ConstantRange Range(Lower, Upper); + CI->addRangeRetAttr(Range); + } else { + MDBuilder MDB(I->getContext()); + MDNode *MaxWorkGroupSizeRange = MDB.createRange(Lower, Upper); + I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); + } return true; } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index d6841d40f2313e..bd61558905f634 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -52,9 +52,9 @@ ; HSAOPT: [[LDZU:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP1]], align 4, !range !2, !invariant.load !1 ; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16 -; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !3 -; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.y(), !range !3 -; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.z(), !range !3 +; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() ; HSAOPT: [[Y_SIZE_X_Z_SIZE:%[0-9]+]] = mul nuw nsw i32 [[EXTRACTY]], [[LDZU]] ; HSAOPT: [[YZ_X_XID:%[0-9]+]] = mul i32 [[Y_SIZE_X_Z_SIZE]], [[WORKITEM_ID_X]] @@ -68,11 +68,11 @@ ; HSAOPT: %arrayidx12 = getelementptr inbounds [5 x i32], ptr addrspace(3) [[LOCAL_GEP]], i32 0, i32 1 -; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !1 -; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !1 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !2 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !2 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !2 +; NOHSAOPT: call range(i32 0, 257) i32 @llvm.r600.read.local.size.y() +; NOHSAOPT: call range(i32 0, 257) i32 @llvm.r600.read.local.size.z() +; NOHSAOPT: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; NOHSAOPT: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; NOHSAOPT: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) @@ -533,8 +533,3 @@ attributes #1 = { nounwind "amdgpu-flat-work-group-size"="1,256" } !99 = !{i32 1, !"amdhsa_code_object_version", i32 400} ; HSAOPT: !1 = !{} -; HSAOPT: !2 = !{i32 0, i32 257} -; HSAOPT: !3 = !{i32 0, i32 256} - -; NOHSAOPT: !1 = !{i32 0, i32 257} -; NOHSAOPT: !2 = !{i32 0, i32 256} diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll index 462ab38b4cd58d..1f7de7343efdb7 100644 --- a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll +++ b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll @@ -12,11 +12,11 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; R600: LDS_READ ; R600: LDS_READ -; OPT: call i32 @llvm.r600.read.local.size.y(), !range !0 -; OPT: call i32 @llvm.r600.read.local.size.z(), !range !0 -; OPT: call i32 @llvm.r600.read.tidig.x(), !range !1 -; OPT: call i32 @llvm.r600.read.tidig.y(), !range !1 -; OPT: call i32 @llvm.r600.read.tidig.z(), !range !1 +; OPT: call range(i32 0, 257) i32 @llvm.r600.read.local.size.y() +; OPT: call range(i32 0, 257) i32 @llvm.r600.read.local.size.z() +; OPT: call range(i32 0, 256) i32 @llvm.r600.read.tidig.x() +; OPT: call range(i32 0, 256) i32 @llvm.r600.read.tidig.y() +; OPT: call range(i32 0, 256) i32 @llvm.r600.read.tidig.z() define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 { entry: @@ -276,7 +276,4 @@ define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ret void } -; OPT: !0 = !{i32 0, i32 257} -; OPT: !1 = !{i32 0, i32 256} - attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="1,256" } diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-strip-abi-opt-attributes.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-strip-abi-opt-attributes.ll index ada1b841cd67c3..778fe904382f2f 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-strip-abi-opt-attributes.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-strip-abi-opt-attributes.ll @@ -5,9 +5,9 @@ ; CHECK-LABEL: define amdgpu_kernel void @promote_to_lds(ptr addrspace(1) %out, i32 %in) #0 { ; CHECK: call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK: call i32 @llvm.amdgcn.workitem.id.x(), !range !2 -; CHECK: call i32 @llvm.amdgcn.workitem.id.y(), !range !2 -; CHECK: call i32 @llvm.amdgcn.workitem.id.z(), !range !2 +; CHECK: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; CHECK: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; CHECK: call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() define amdgpu_kernel void @promote_to_lds(ptr addrspace(1) %out, i32 %in) #0 { entry: %tmp = alloca [2 x i32], addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll index efc11bf1a606da..f56656270c335d 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll @@ -8,22 +8,22 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer( ; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !invariant.load !0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4, !range [[RNG1:![0-9]+]], !invariant.load !0 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP4]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2:![0-9]+]] -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]] -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 [[TMP15]] -; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16]], i32 0, i32 [[A:%.*]] -; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16]], i32 0, i32 [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1:![0-9]+]], !invariant.load [[META0]] +; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP9:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 [[TMP14]] +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15]], i32 0, i32 [[A:%.*]] +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15]], i32 0, i32 [[B:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(3) [[PTR0]], [[PTR1]] ; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; CHECK-NEXT: store volatile i32 [[ZEXT]], ptr addrspace(1) [[OUT:%.*]], align 4 @@ -50,21 +50,21 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(ptr add define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs( ; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !invariant.load !0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4, !range [[RNG1]], !invariant.load !0 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP4]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]] -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]] -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 [[TMP15]] -; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16]], i32 0, i32 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1]], !invariant.load [[META0]] +; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP9:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 [[TMP14]] +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15]], i32 0, i32 [[A:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(3) [[PTR0]], null ; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; CHECK-NEXT: store volatile i32 [[ZEXT]], ptr addrspace(1) [[OUT:%.*]], align 4 @@ -89,21 +89,21 @@ define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(ptr addrspace(1) %o define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs( ; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !invariant.load !0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4, !range [[RNG1]], !invariant.load !0 -; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP4]], 16 -; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG2]] -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.workitem.id.y(), !range [[RNG2]] -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.workitem.id.z(), !range [[RNG2]] -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw nsw i32 [[TMP9]], [[TMP6]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 [[TMP15]] -; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP16]], i32 0, i32 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1]], !invariant.load [[META0]] +; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP9:%.*]] = call range(i32 0, 256) i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [256 x [16 x i32]], ptr addrspace(3) @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 [[TMP14]] +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [16 x i32], ptr addrspace(3) [[TMP15]], i32 0, i32 [[A:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr addrspace(3) null, [[PTR0]] ; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 ; CHECK-NEXT: store volatile i32 [[ZEXT]], ptr addrspace(1) [[OUT:%.*]], align 4