llvm · giuseros · Jun 5, 2024 · Jun 5, 2024 · Jun 6, 2024 · Jun 7, 2024
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -165,7 +165,7 @@ def ROCDL_BallotOp :
   let summary = "Vote across thread group";
 
   let description = [{
-      Ballot provides a bit mask containing the 1-bit predicate value from each lane. 
+      Ballot provides a bit mask containing the 1-bit predicate value from each lane.
       The nth bit of the result contains the 1 bit contributed by the nth warp lane.
   }];
 
@@ -342,6 +342,7 @@ def ROCDL_wmma_i32_16x16x16_iu4 : ROCDL_Wmma_IntrOp<"wmma.i32.16x16x16.iu4", [1]
 //===---------------------------------------------------------------------===//
 
 def ROCDLBufferRsrc : LLVM_PointerInAddressSpace<8>;
+def ROCDLGlobalPtr: LLVM_PointerInAddressSpace<1>;
 
 def ROCDL_MakeBufferRsrcOp :
   ROCDL_IntrOp<"make.buffer.rsrc", [], [0], [Pure], 1>,
@@ -516,7 +517,7 @@ def ROCDL_RawBufferAtomicCmpSwap :
 }
 
 //===---------------------------------------------------------------------===//
-// MI-100 and MI-200 buffer atomic floating point add intrinsic
+// gfx9x global/buffer atomic floating point add intrinsics
 
 def ROCDL_RawBufferAtomicFAddOp :
   ROCDL_Op<"raw.buffer.atomic.fadd">,
@@ -534,6 +535,19 @@ def ROCDL_RawBufferAtomicFAddOp :
   let hasCustomAssemblyFormat = 1;
 }
 
+def ROCDL_GlobalAtomicFAddOp :
+  ROCDL_Op<"global.atomic.fadd">,
+  Arguments<(ins ROCDLGlobalPtr:$ptr,
+                 LLVM_Type:$vdata)>{
+  string llvmBuilder = [{
+      auto vdataType = moduleTranslation.convertType(op.getVdata().getType());
+      auto ptrType = moduleTranslation.convertType(op.getPtr().getType());
+      createIntrinsicCall(builder,
+          llvm::Intrinsic::amdgcn_global_atomic_fadd, {$ptr, $vdata}, {vdataType, ptrType, vdataType});
+  }];
+  let assemblyFormat = "operands attr-dict `:` type($vdata)";
+}
+
 //===---------------------------------------------------------------------===//
 // Buffer atomic floating point max intrinsic. GFX9 does not support fp32.
 

diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -494,6 +494,15 @@ llvm.func @rocdl.raw.buffer.atomic.f32(%rsrc : vector<4xi32>,
   llvm.return
 }
 
+// CHECK-LABEL: rocdl.global.atomic
+llvm.func @rocdl.global.atomic(%vdata0 : f32, %vdata1 : vector<2xf16>, %ptr : !llvm.ptr<1>) {
+  // CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}
+  rocdl.global.atomic.fadd %ptr, %vdata0: f32
+  // CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}})
+  rocdl.global.atomic.fadd %ptr, %vdata1: vector<2xf16>
+  llvm.return
+}
+
 llvm.func @rocdl.raw.buffer.atomic.i32(%rsrc : vector<4xi32>,
                         %offset : i32, %soffset : i32,
                         %vdata1 : i32) {