-
Notifications
You must be signed in to change notification settings - Fork 12.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[GlobalsModRef][FIX] Ensure we honor synchronizing effects of intrinsics
This is a long standing problem that resurfaces once in a while [0]. There might actually be two problems because I'm not 100% sure if the issue underlying https://reviews.llvm.org/D115302 would be solved by this or not. Anyway. In 2008 we thought intrinsics do not read/write globals passed to them: d4133ac This is not correct given that intrinsics can synchronize threads and cause effects to effectively become visible. NOTE: I did not yet modify any tests but only tried out the reproducer of #54851. Fixes: #54851 [0] https://discourse.llvm.org/t/bug-gvn-memdep-bug-in-the-presence-of-intrinsics/59402 Differential Revision: https://reviews.llvm.org/D123531
- Loading branch information
Showing
6 changed files
with
206 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
38 changes: 38 additions & 0 deletions
38
llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s | ||
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s | ||
; | ||
; Functions w/o `nosync` attribute may communicate via memory and must be | ||
; treated conservatively. Taken from https://reviews.llvm.org/D115302. | ||
|
||
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" | ||
target triple = "nvptx64-nvidia-cuda" | ||
|
||
@s = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 | ||
|
||
; CHECK-LABEL: @bar_sync | ||
; CHECK: store | ||
; CHECK: tail call void @llvm.nvvm.bar.sync(i32 0) | ||
; CHECK: load | ||
define dso_local i32 @bar_sync(i32 %0) local_unnamed_addr { | ||
store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 | ||
tail call void @llvm.nvvm.bar.sync(i32 0) | ||
%2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 | ||
ret i32 %2 | ||
} | ||
|
||
declare void @llvm.nvvm.bar.sync(i32) #0 | ||
|
||
; CHECK-LABEL: @barrier0 | ||
; CHECK: store | ||
; CHECK: tail call void @llvm.nvvm.barrier0() | ||
; CHECK: load | ||
define dso_local i32 @barrier0(i32 %0) local_unnamed_addr { | ||
store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 | ||
tail call void @llvm.nvvm.barrier0() | ||
%2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 | ||
ret i32 %2 | ||
} | ||
|
||
declare void @llvm.nvvm.barrier0() #0 | ||
|
||
attributes #0 = { convergent nounwind } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes | ||
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s | ||
|
||
; Make sure we do not hoist the load before the intrinsic, unknown function, or | ||
; optnone function except if we know the unknown function is nosync and nocallback. | ||
|
||
@G1 = internal global i32 undef | ||
@G2 = internal global i32 undef | ||
@G3 = internal global i32 undef | ||
@G4 = internal global i32 undef | ||
|
||
define void @test_barrier(i1 %c) { | ||
; CHECK-LABEL: define {{[^@]+}}@test_barrier | ||
; CHECK-SAME: (i1 [[C:%.*]]) { | ||
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]] | ||
; CHECK: init: | ||
; CHECK-NEXT: store i32 0, ptr @G1, align 4 | ||
; CHECK-NEXT: br label [[CHECK]] | ||
; CHECK: check: | ||
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() | ||
; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G1, align 4 | ||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 | ||
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) | ||
; CHECK-NEXT: ret void | ||
; | ||
br i1 %c, label %init, label %check | ||
init: | ||
store i32 0, ptr @G1 | ||
br label %check | ||
check: | ||
call void @llvm.amdgcn.s.barrier() | ||
%v = load i32, ptr @G1 | ||
%cmp = icmp eq i32 %v, 0 | ||
call void @llvm.assume(i1 %cmp) | ||
ret void | ||
} | ||
|
||
define void @test_unknown(i1 %c) { | ||
; CHECK-LABEL: define {{[^@]+}}@test_unknown | ||
; CHECK-SAME: (i1 [[C:%.*]]) { | ||
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]] | ||
; CHECK: init: | ||
; CHECK-NEXT: store i32 0, ptr @G2, align 4 | ||
; CHECK-NEXT: br label [[CHECK]] | ||
; CHECK: check: | ||
; CHECK-NEXT: call void @unknown() | ||
; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G2, align 4 | ||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 | ||
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) | ||
; CHECK-NEXT: ret void | ||
; | ||
br i1 %c, label %init, label %check | ||
init: | ||
store i32 0, ptr @G2 | ||
br label %check | ||
check: | ||
call void @unknown() | ||
%v = load i32, ptr @G2 | ||
%cmp = icmp eq i32 %v, 0 | ||
call void @llvm.assume(i1 %cmp) | ||
ret void | ||
} | ||
|
||
define void @test_optnone(i1 %c) { | ||
; CHECK-LABEL: define {{[^@]+}}@test_optnone | ||
; CHECK-SAME: (i1 [[C:%.*]]) { | ||
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]] | ||
; CHECK: init: | ||
; CHECK-NEXT: store i32 0, ptr @G3, align 4 | ||
; CHECK-NEXT: br label [[CHECK]] | ||
; CHECK: check: | ||
; CHECK-NEXT: call void @optnone() | ||
; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G3, align 4 | ||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 | ||
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) | ||
; CHECK-NEXT: ret void | ||
; | ||
br i1 %c, label %init, label %check | ||
init: | ||
store i32 0, ptr @G3 | ||
br label %check | ||
check: | ||
call void @optnone() | ||
%v = load i32, ptr @G3 | ||
%cmp = icmp eq i32 %v, 0 | ||
call void @llvm.assume(i1 %cmp) | ||
ret void | ||
} | ||
|
||
define void @optnone() optnone nosync nocallback noinline { | ||
; CHECK: Function Attrs: nocallback noinline nosync optnone | ||
; CHECK-LABEL: define {{[^@]+}}@optnone | ||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] { | ||
; CHECK-NEXT: ret void | ||
; | ||
ret void | ||
} | ||
|
||
; Here hoisting is legal and we use it to verify it will happen. | ||
define void @test_unknown_annotated(i1 %c) { | ||
; CHECK-LABEL: define {{[^@]+}}@test_unknown_annotated | ||
; CHECK-SAME: (i1 [[C:%.*]]) { | ||
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[DOTCHECK_CRIT_EDGE:%.*]] | ||
; CHECK: .check_crit_edge: | ||
; CHECK-NEXT: [[V_PRE:%.*]] = load i32, ptr @G4, align 4 | ||
; CHECK-NEXT: br label [[CHECK:%.*]] | ||
; CHECK: init: | ||
; CHECK-NEXT: store i32 0, ptr @G4, align 4 | ||
; CHECK-NEXT: br label [[CHECK]] | ||
; CHECK: check: | ||
; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[DOTCHECK_CRIT_EDGE]] ], [ 0, [[INIT]] ] | ||
; CHECK-NEXT: call void @unknown_nosync_nocallback() | ||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 | ||
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) | ||
; CHECK-NEXT: ret void | ||
; | ||
br i1 %c, label %init, label %check | ||
init: | ||
store i32 0, ptr @G4 | ||
br label %check | ||
check: | ||
call void @unknown_nosync_nocallback() | ||
%v = load i32, ptr @G4 | ||
%cmp = icmp eq i32 %v, 0 | ||
call void @llvm.assume(i1 %cmp) | ||
ret void | ||
} | ||
|
||
declare void @unknown() | ||
declare void @unknown_nosync_nocallback() nosync nocallback | ||
declare void @llvm.amdgcn.s.barrier() | ||
declare void @llvm.assume(i1 noundef) | ||
|