Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest #104748

Open
wants to merge 8 commits into
base: users/ivanradanov/flang-workshare
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/PatternMatch.h"
Expand Down Expand Up @@ -792,7 +793,8 @@ struct ElementalOpConversion
// Generate a loop nest looping around the fir.elemental shape and clone
// fir.elemental region inside the inner loop.
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
flangomp::shouldUseWorkshareLowering(elemental));
auto insPt = builder.saveInsertionPoint();
builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
Expand Down
10 changes: 7 additions & 3 deletions flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/Transforms/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Dominance.h"
Expand Down Expand Up @@ -482,7 +483,8 @@ llvm::LogicalResult ElementalAssignBufferization::matchAndRewrite(
// Generate a loop nest looping around the hlfir.elemental shape and clone
// hlfir.elemental region inside the inner loop
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
flangomp::shouldUseWorkshareLowering(elemental));
builder.setInsertionPointToStart(loopNest.body);
auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
loopNest.oneBasedIndices);
Expand Down Expand Up @@ -553,7 +555,8 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto arrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
Expand Down Expand Up @@ -648,7 +651,8 @@ llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(assign));
builder.setInsertionPointToStart(loopNest.body);
auto rhsArrayElement =
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
Expand Down
57 changes: 57 additions & 0 deletions flang/test/HLFIR/bufferize-workshare.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// RUN: fir-opt --bufferize-hlfir %s | FileCheck %s

// CHECK-LABEL: func.func @simple(
// CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.array<42xi32>>) {
// CHECK: omp.parallel {
// CHECK: omp.workshare {
// CHECK: %[[VAL_1:.*]] = arith.constant 42 : index
// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32
// CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
// CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_3]]) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<42xi32> {bindc_name = ".tmp.array", uniq_name = ""}
// CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<42xi32>>, !fir.heap<!fir.array<42xi32>>)
// CHECK: %[[VAL_7:.*]] = arith.constant true
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
// CHECK: omp.workshare.loop_wrapper {
// CHECK: omp.loop_nest (%[[VAL_9:.*]]) : index = (%[[VAL_8]]) to (%[[VAL_1]]) inclusive step (%[[VAL_8]]) {
// CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]]) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
// CHECK: %[[VAL_12:.*]] = arith.subi %[[VAL_11]], %[[VAL_2]] : i32
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_9]]) : (!fir.heap<!fir.array<42xi32>>, index) -> !fir.ref<i32>
// CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_13]] temporary_lhs : i32, !fir.ref<i32>
// CHECK: omp.yield
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_14:.*]] = fir.undefined tuple<!fir.heap<!fir.array<42xi32>>, i1>
// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_7]], [1 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, i1) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_6]]#0, [0 : index] : (tuple<!fir.heap<!fir.array<42xi32>>, i1>, !fir.heap<!fir.array<42xi32>>) -> tuple<!fir.heap<!fir.array<42xi32>>, i1>
// CHECK: hlfir.assign %[[VAL_6]]#0 to %[[VAL_4]]#0 : !fir.heap<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>
// CHECK: fir.freemem %[[VAL_6]]#0 : !fir.heap<!fir.array<42xi32>>
// CHECK: omp.terminator
// CHECK: }
// CHECK: omp.terminator
// CHECK: }
// CHECK: return
// CHECK: }
func.func @simple(%arg: !fir.ref<!fir.array<42xi32>>) {
omp.parallel {
omp.workshare {
%c42 = arith.constant 42 : index
%c1_i32 = arith.constant 1 : i32
%shape = fir.shape %c42 : (index) -> !fir.shape<1>
%array:2 = hlfir.declare %arg(%shape) {uniq_name = "array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
%elemental = hlfir.elemental %shape unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
^bb0(%i: index):
%ref = hlfir.designate %array#0 (%i) : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
%val = fir.load %ref : !fir.ref<i32>
%sub = arith.subi %val, %c1_i32 : i32
hlfir.yield_element %sub : i32
}
hlfir.assign %elemental to %array#0 : !hlfir.expr<42xi32>, !fir.ref<!fir.array<42xi32>>
hlfir.destroy %elemental : !hlfir.expr<42xi32>
omp.terminator
}
omp.terminator
}
return
}
34 changes: 34 additions & 0 deletions flang/test/Integration/OpenMP/workshare-array-array-assign.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR

subroutine sb1(x, y)
integer :: x(:)
integer :: y(:)
!$omp parallel workshare
x = y
!$omp end parallel workshare
end subroutine

! HLFIR: omp.parallel {
! HLFIR: omp.workshare {
! HLFIR: hlfir.assign
! HLFIR: omp.terminator
! HLFIR: }
! HLFIR: omp.terminator
! HLFIR: }

! FIR: omp.parallel {
! FIR: omp.wsloop nowait {
! FIR: omp.loop_nest
! FIR: }
! FIR: omp.barrier
! FIR: omp.terminator
! FIR: }
57 changes: 57 additions & 0 deletions flang/test/Integration/OpenMP/workshare-axpy.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR

subroutine sb1(a, x, y, z)
integer :: a
integer :: x(:)
integer :: y(:)
integer :: z(:)
!$omp parallel workshare
z = a * x + y
!$omp end parallel workshare
end subroutine

! HLFIR: func.func @_QPsb1
! HLFIR: omp.parallel {
! HLFIR: omp.workshare {
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
! HLFIR: hlfir.elemental {{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
! HLFIR: hlfir.assign
! HLFIR: hlfir.destroy
! HLFIR: hlfir.destroy
! HLFIR-NOT: omp.barrier
! HLFIR: omp.terminator
! HLFIR: }
! HLFIR-NOT: omp.barrier
! HLFIR: omp.terminator
! HLFIR: }
! HLFIR: return
! HLFIR: }
! HLFIR:}


! FIR: func.func private @_workshare_copy_heap_Uxi32(%{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>, %{{[a-z0-9]+}}: !fir.ref<!fir.heap<!fir.array<?xi32>>>
! FIR: func.func private @_workshare_copy_i32(%{{[a-z0-9]+}}: !fir.ref<i32>, %{{[a-z0-9]+}}: !fir.ref<i32>

! FIR: func.func @_QPsb1
! FIR: omp.parallel {
! FIR: omp.single copyprivate(%9 -> @_workshare_copy_i32 : !fir.ref<i32>, %10 -> @_workshare_copy_heap_Uxi32 : !fir.ref<!fir.heap<!fir.array<?xi32>>>) {
! FIR: fir.allocmem
! FIR: omp.wsloop {
! FIR: omp.loop_nest
! FIR: omp.single nowait {
! FIR: fir.call @_FortranAAssign
! FIR: fir.freemem
! FIR: omp.terminator
! FIR: }
! FIR: omp.barrier
! FIR: omp.terminator
! FIR: }
45 changes: 45 additions & 0 deletions flang/test/Integration/OpenMP/workshare-scalar-array-assign.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR

subroutine sb1(a, x)
integer :: a
integer :: x(:)
!$omp parallel workshare
x = a
!$omp end parallel workshare
end subroutine

! HLFIR: omp.parallel {
! HLFIR: omp.workshare {
! HLFIR: %[[SCALAR:.*]] = fir.load %1#0 : !fir.ref<i32>
! HLFIR: hlfir.assign %[[SCALAR]] to
! HLFIR: omp.terminator
! HLFIR: }
! HLFIR: omp.terminator
! HLFIR: }

! FIR: omp.parallel {
! FIR: %[[SCALAR_ALLOCA:.*]] = fir.alloca i32
! FIR: omp.single copyprivate(%[[SCALAR_ALLOCA]] -> @_workshare_copy_i32 : !fir.ref<i32>) {
! FIR: %[[SCALAR_LOAD:.*]] = fir.load %{{.*}} : !fir.ref<i32>
! FIR: fir.store %[[SCALAR_LOAD]] to %[[SCALAR_ALLOCA]] : !fir.ref<i32>
! FIR: omp.terminator
! FIR: }
! FIR: %[[SCALAR_RELOAD:.*]] = fir.load %[[SCALAR_ALLOCA]] : !fir.ref<i32>
! FIR: %6:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
! FIR: omp.wsloop nowait {
! FIR: omp.loop_nest (%arg2) : index = (%c1) to (%6#1) inclusive step (%c1) {
! FIR: fir.store %[[SCALAR_RELOAD]]
! FIR: omp.yield
! FIR: }
! FIR: }
! FIR: omp.barrier
! FIR: omp.terminator
65 changes: 65 additions & 0 deletions flang/test/Integration/OpenMP/workshare-scalar-array-mul.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
!===----------------------------------------------------------------------===!
! This directory can be used to add Integration tests involving multiple
! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
! contain executable tests. We should only add tests here sparingly and only
! if there is no other way to test. Repeat this message in each test that is
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix HLFIR-O3
!RUN: %flang_fc1 -emit-fir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix FIR-O3

!RUN: %flang_fc1 -emit-hlfir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix HLFIR-O0
!RUN: %flang_fc1 -emit-fir -fopenmp -O0 %s -o - | FileCheck %s --check-prefix FIR-O0

program test
real :: arr_01(10)
!$omp parallel workshare
arr_01 = arr_01*2
!$omp end parallel workshare
end program

! HLFIR-O3: omp.parallel {
! HLFIR-O3: omp.workshare {
! HLFIR-O3: hlfir.elemental
! HLFIR-O3: hlfir.assign
! HLFIR-O3: hlfir.destroy
! HLFIR-O3: omp.terminator
! HLFIR-O3: omp.terminator

! FIR-O3: omp.parallel {
! FIR-O3: omp.wsloop nowait {
! FIR-O3: omp.loop_nest
! FIR-O3: omp.barrier
! FIR-O3: omp.terminator

! HLFIR-O0: omp.parallel {
! HLFIR-O0: omp.workshare {
! HLFIR-O0: hlfir.elemental
! HLFIR-O0: hlfir.assign
! HLFIR-O0: hlfir.destroy
! HLFIR-O0: omp.terminator
! HLFIR-O0: omp.terminator

! Check the copyprivate copy function
! FIR-O0: func.func private @_workshare_copy_heap_{{.*}}(%[[DST:.*]]: {{.*}}, %[[SRC:.*]]: {{.*}})
! FIR-O0: fir.load %[[SRC]]
! FIR-O0: fir.store {{.*}} to %[[DST]]

! Check that we properly handle the temporary array
! FIR-O0: omp.parallel {
! FIR-O0: %[[CP:.*]] = fir.alloca !fir.heap<!fir.array<10xf32>>
! FIR-O0: omp.single copyprivate(%[[CP]] -> @_workshare_copy_heap_
! FIR-O0: fir.allocmem
! FIR-O0: fir.store
! FIR-O0: omp.terminator
! FIR-O0: fir.load %[[CP]]
! FIR-O0: omp.wsloop {
! FIR-O0: omp.loop_nest
! FIR-O0: omp.yield
! FIR-O0: omp.single nowait {
! FIR-O0: fir.call @_FortranAAssign
! FIR-O0: fir.freemem
! FIR-O0: omp.terminator
! FIR-O0: omp.barrier
! FIR-O0: omp.terminator
Loading