Skip to content

Commit

Permalink
[SystemZ] Properly support 16 byte atomic int/fp types and ops. (#73134)
Browse files Browse the repository at this point in the history
- Clang FE now has MaxAtomicPromoteWidth / MaxAtomicInlineWidth set to 128, and now produces IR
  instead of calls to __atomic instrinsics for 16 bytes as well.
- Atomic __int128 (and long double) variables are now aligned to 16 bytes by default (like gcc 14).
- AtomicExpand pass now expands 16 byte operations as well.
- tests for __atomic builtins for all integer widths, and __atomic_is_lock_free with friends.
- TODO: AtomicExpand pass handles with this patch expansion of i128 atomicrmw:s. As a next step
  smaller integer types should also be possible to handle this way instead of by the backend.
  • Loading branch information
JonPsson1 authored Dec 5, 2023
1 parent a7993fa commit c568927
Show file tree
Hide file tree
Showing 11 changed files with 2,011 additions and 62 deletions.
2 changes: 1 addition & 1 deletion clang/lib/Basic/Targets/SystemZ.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
resetDataLayout("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
"-v128:64-a:8:16-n32:64");
}
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 128;
HasStrictFP = true;
}

Expand Down
98 changes: 98 additions & 0 deletions clang/test/CodeGen/SystemZ/atomic_is_lock_free.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s
//
// Test __atomic_is_lock_free() and friends.

#include <stdatomic.h>
#include <stdint.h>

typedef __attribute__((aligned(16))) __int128 __int128_Al16;

_Atomic __int128 Int128_Atomic;
__int128_Al16 Int128_Al16;
__int128 Int128;
struct { int I[3]; } _Atomic AtomicStruct;
_Atomic long double Atomic_fp128; // Also check the alignment of this.

// Check alignments of the variables. @AtomicStruct gets padded and its size
// and alignment becomes 16. Only a power-of-2 size is considered, so 16 (not
// 12) needs to be specified with the intrinsics below.
//
// CHECK: %struct.anon = type { [3 x i32] }
// CHECK: @Int128 = {{.*}} i128 0, align 8
// CHECK: @Int128_Atomic = {{.*}} i128 0, align 16
// CHECK: @Int128_Al16 = {{.*}} i128 0, align 16
// CHECK: @AtomicStruct = {{.*}} { %struct.anon, [4 x i8] } zeroinitializer, align 16
// CHECK: @Atomic_fp128 = {{.*}} fp128 0xL00000000000000000000000000000000, align 16


// CHECK-LABEL: @fun0
// CHECK: ret i1 true
_Bool fun0() {
return __atomic_is_lock_free(16, &Int128_Atomic);
}

// CHECK-LABEL: @fun1
// CHECK: ret i1 true
_Bool fun1() {
return __atomic_always_lock_free(16, &Int128_Atomic);
}

// CHECK-LABEL: @fun2
// CHECK: ret i1 true
_Bool fun2() {
return __atomic_is_lock_free(16, &Int128_Al16);
}

// CHECK-LABEL: @fun3
// CHECK: ret i1 true
_Bool fun3() {
return __atomic_always_lock_free(16, &Int128_Al16);
}

// CHECK-LABEL: @fun4
// CHECK: call zeroext i1 @__atomic_is_lock_free
_Bool fun4() {
return __atomic_is_lock_free(16, &Int128);
}

// CHECK-LABEL: @fun5
// CHECK: ret i1 false
_Bool fun5() {
return __atomic_always_lock_free(16, &Int128);
}

// CHECK-LABEL: @fun6
// CHECK: ret i1 true
_Bool fun6() {
return __atomic_is_lock_free(16, 0);
}

// CHECK-LABEL: @fun7
// CHECK: ret i1 true
_Bool fun7() {
return __atomic_always_lock_free(16, 0);
}

// CHECK-LABEL: @fun8
// CHECK: ret i1 true
_Bool fun8() {
return __atomic_is_lock_free(16, &AtomicStruct);
}

// CHECK-LABEL: @fun9
// CHECK: ret i1 true
_Bool fun9() {
return __atomic_always_lock_free(16, &AtomicStruct);
}

// CHECK-LABEL: @fun10
// CHECK: ret i1 true
_Bool fun10() {
return atomic_is_lock_free(&Int128_Atomic);
}

// CHECK-LABEL: @fun11
// CHECK: ret i1 true
_Bool fun11() {
return __c11_atomic_is_lock_free(16);
}
257 changes: 257 additions & 0 deletions clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-16Al.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple s390x-linux-gnu -O1 -emit-llvm %s -o - | FileCheck %s
//
// Test GNU atomic builtins for __int128 aligned to 16 bytes, which should be
// expanded to LLVM I/R by the front end.

#include <stdatomic.h>
#include <stdint.h>

__int128 Ptr __attribute__((aligned(16)));
__int128 Ret __attribute__((aligned(16)));
__int128 Val __attribute__((aligned(16)));
__int128 Exp __attribute__((aligned(16)));
__int128 Des __attribute__((aligned(16)));

// CHECK-LABEL: @f1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2:![0-9]+]]
// CHECK-NEXT: ret void
//
__int128 f1() {
return __atomic_load_n(&Ptr, memory_order_seq_cst);
}

// CHECK-LABEL: @f2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP0]], ptr @Ret, align 16
// CHECK-NEXT: store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f2() {
__atomic_load(&Ptr, &Ret, memory_order_seq_cst);
return Ret;
}

// CHECK-LABEL: @f3(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 16
// CHECK-NEXT: ret void
//
void f3() {
__atomic_store_n(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16
// CHECK-NEXT: store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 16
// CHECK-NEXT: ret void
//
void f4() {
__atomic_store(&Ptr, &Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f5(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f5() {
return __atomic_exchange_n(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f6(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP1]], ptr @Ret, align 16
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f6() {
__atomic_exchange(&Ptr, &Val, &Ret, memory_order_seq_cst);
return Ret;
}

// CHECK-LABEL: @f7(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Des, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Exp, align 16
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 16
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
// CHECK: cmpxchg.store_expected:
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16
// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
// CHECK: cmpxchg.continue:
// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f7() {
return __atomic_compare_exchange_n(&Ptr, &Exp, Des, 0,
memory_order_seq_cst, memory_order_seq_cst);
}

// CHECK-LABEL: @f8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Exp, align 16
// CHECK-NEXT: [[TMP1:%.*]] = load i128, ptr @Des, align 16
// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 16
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
// CHECK-NEXT: br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
// CHECK: cmpxchg.store_expected:
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
// CHECK-NEXT: store i128 [[TMP4]], ptr @Exp, align 16
// CHECK-NEXT: br label [[CMPXCHG_CONTINUE]]
// CHECK: cmpxchg.continue:
// CHECK-NEXT: ret i1 [[TMP3]]
//
_Bool f8() {
return __atomic_compare_exchange(&Ptr, &Exp, &Des, 0,
memory_order_seq_cst, memory_order_seq_cst);
}

// CHECK-LABEL: @f9(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]]
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f9() {
return __atomic_add_fetch(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f10(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]]
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f10() {
return __atomic_sub_fetch(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f11(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f11() {
return __atomic_and_fetch(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f12(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]]
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f12() {
return __atomic_xor_fetch(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f13(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]]
// CHECK-NEXT: store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f13() {
return __atomic_or_fetch(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f14(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
// CHECK-NEXT: [[TMP3:%.*]] = xor i128 [[TMP2]], -1
// CHECK-NEXT: store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f14() {
return __atomic_nand_fetch(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f15(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f15() {
return __atomic_fetch_add(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f16() {
return __atomic_fetch_sub(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f17(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f17() {
return __atomic_fetch_and(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f18(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f18() {
return __atomic_fetch_xor(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f19(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f19() {
return __atomic_fetch_or(&Ptr, Val, memory_order_seq_cst);
}

// CHECK-LABEL: @f20(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr @Val, align 16, !tbaa [[TBAA2]]
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 16
// CHECK-NEXT: store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
// CHECK-NEXT: ret void
//
__int128 f20() {
return __atomic_fetch_nand(&Ptr, Val, memory_order_seq_cst);
}
Loading

0 comments on commit c568927

Please sign in to comment.