Skip to content

Commit

Permalink
[flang][OpenMP] Add support for target ... private
Browse files Browse the repository at this point in the history
Adds support for the `private` clause in the `target` directive. In
order to support that, the `DataSharingProcessor` was also restructured
in order to separate the collection of prviate symbols from their actual
privatization code-gen.

The commit adds both a code-gen and an offloading tests.
  • Loading branch information
ergawy committed Jan 22, 2024
1 parent fa6025e commit a57b8d3
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 23 deletions.
81 changes: 58 additions & 23 deletions flang/lib/Lower/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,17 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
//===----------------------------------------------------------------------===//

class DataSharingProcessor {
using SymbolSet = llvm::SetVector<const Fortran::semantics::Symbol *>;

bool hasLastPrivateOp;
mlir::OpBuilder::InsertPoint lastPrivIP;
mlir::OpBuilder::InsertPoint insPt;
mlir::Value loopIV;
// Symbols in private, firstprivate, and/or lastprivate clauses.
llvm::SetVector<const Fortran::semantics::Symbol *> privatizedSymbols;
llvm::SetVector<const Fortran::semantics::Symbol *> defaultSymbols;
llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInNestedRegions;
llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInParentRegions;
SymbolSet privatizedSymbols;
SymbolSet defaultSymbols;
SymbolSet symbolsInNestedRegions;
SymbolSet symbolsInParentRegions;
Fortran::lower::AbstractConverter &converter;
fir::FirOpBuilder &firOpBuilder;
const Fortran::parser::OmpClauseList &opClauseList;
Expand Down Expand Up @@ -182,35 +184,54 @@ class DataSharingProcessor {
: hasLastPrivateOp(false), converter(converter),
firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
eval(eval) {}
// Privatisation is split into two steps.
// Step1 performs cloning of all privatisation clauses and copying for
// firstprivates. Step1 is performed at the place where process/processStep1
// Privatisation is split into 3 steps:
//
// * Step1: collects all symbols that should be privatized.
//
// * Step2: performs cloning of all privatisation clauses and copying for
// firstprivates. Step2 is performed at the place where process/processStep2
// is called. This is usually inside the Operation corresponding to the OpenMP
// construct, for looping constructs this is just before the Operation. The
// split into two steps was performed basically to be able to call
// privatisation for looping constructs before the operation is created since
// the bounds of the MLIR OpenMP operation can be privatised.
// Step2 performs the copying for lastprivates and requires knowledge of the
// MLIR operation to insert the last private update. Step2 adds
// construct, for looping constructs this is just before the Operation.
//
// * Step3: performs the copying for lastprivates and requires knowledge of
// the MLIR operation to insert the last private update. Step3 adds
// dealocation code as well.
//
// The split was performed for the following reasons:
//
// 1. Step1 was split so that the `target` op knows which symbols should not
// be mapped into the target region due to being `private`. The implicit
// mapping happens before the op body is generated so we need to to collect
// the private symbols first and then later in the body actually privatize
// them.
//
// 2. Step2 was split in order to call privatisation for looping constructs
// before the operation is created since the bounds of the MLIR OpenMP
// operation can be privatised.
void processStep1();
void processStep2(mlir::Operation *op, bool isLoop);
void processStep2();
void processStep3(mlir::Operation *op, bool isLoop);

void setLoopIV(mlir::Value iv) {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}

const SymbolSet &getPrivatizedSymbols() const { return privatizedSymbols; }
};

void DataSharingProcessor::processStep1() {
collectSymbolsForPrivatization();
collectDefaultSymbols();
}

void DataSharingProcessor::processStep2() {
privatize();
defaultPrivatize();
insertBarrier();
}

void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) {
insPt = firOpBuilder.saveInsertionPoint();
copyLastPrivatize(op);
firOpBuilder.restoreInsertionPoint(insPt);
Expand Down Expand Up @@ -2306,11 +2327,12 @@ static void createBodyOfOp(
if (!dsp) {
DataSharingProcessor proc(converter, *clauses, eval);
proc.processStep1();
proc.processStep2(op, isLoop);
proc.processStep2();
proc.processStep3(op, isLoop);
} else {
if (isLoop && args.size() > 0)
dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
dsp->processStep2(op, isLoop);
dsp->processStep3(op, isLoop);
}

if (storeOp)
Expand Down Expand Up @@ -2648,7 +2670,9 @@ static void genBodyOfTargetOp(
const llvm::SmallVector<mlir::Type> &mapSymTypes,
const llvm::SmallVector<mlir::Location> &mapSymLocs,
const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
const mlir::Location &currentLocation) {
const mlir::Location &currentLocation,
const Fortran::parser::OmpClauseList &clauseList,
DataSharingProcessor &dsp) {
assert(mapSymTypes.size() == mapSymLocs.size());

fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
Expand All @@ -2657,6 +2681,8 @@ static void genBodyOfTargetOp(
auto *regionBlock =
firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);

dsp.processStep2();

// Clones the `bounds` placing them inside the target region and returns them.
auto cloneBound = [&](mlir::Value bound) {
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
Expand Down Expand Up @@ -2811,8 +2837,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
cp.processNowait(nowaitAttr);
cp.processMap(currentLocation, directive, semanticsContext, stmtCtx,
mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols);
cp.processTODO<Fortran::parser::OmpClause::Private,
Fortran::parser::OmpClause::Depend,
cp.processTODO<Fortran::parser::OmpClause::Depend,
Fortran::parser::OmpClause::Firstprivate,
Fortran::parser::OmpClause::IsDevicePtr,
Fortran::parser::OmpClause::HasDeviceAddr,
Expand All @@ -2823,11 +2848,19 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
Fortran::parser::OmpClause::Defaultmap>(
currentLocation, llvm::omp::Directive::OMPD_target);

DataSharingProcessor dsp(converter, clauseList, eval);
dsp.processStep1();

// 5.8.1 Implicit Data-Mapping Attribute Rules
// The following code follows the implicit data-mapping rules to map all the
// symbols used inside the region that have not been explicitly mapped using
// the map clause.
// symbols used inside the region that do not have explicit data-environment
// attribute clauses (neither data-sharing; e.g. `private`, nor `map`
// clauses).
auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
if (dsp.getPrivatizedSymbols().contains(&sym)) {
return;
}

if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
mlir::Value baseOp = converter.getSymbolAddress(sym);
if (!baseOp)
Expand Down Expand Up @@ -2893,7 +2926,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
nowaitAttr, mapOperands);

genBodyOfTargetOp(converter, eval, genNested, targetOp, mapSymTypes,
mapSymLocs, mapSymbols, currentLocation);
mapSymLocs, mapSymbols, currentLocation, clauseList, dsp);

return targetOp;
}
Expand Down Expand Up @@ -3127,6 +3160,7 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
DataSharingProcessor dsp(converter, loopOpClauseList, eval);
dsp.processStep1();
dsp.processStep2();

Fortran::lower::StatementContext stmtCtx;
mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
Expand Down Expand Up @@ -3179,6 +3213,7 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
DataSharingProcessor dsp(converter, beginClauseList, eval);
dsp.processStep1();
dsp.processStep2();

Fortran::lower::StatementContext stmtCtx;
mlir::Value scheduleChunkClauseOperand;
Expand Down
30 changes: 30 additions & 0 deletions flang/test/Lower/OpenMP/target_private.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
!Test data-sharing attribute clauses for the `target` directive.

!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s

!CHECK-LABEL: func.func @_QPomp_target_private()
subroutine omp_target_private
implicit none
integer :: x(1)

!$omp target private(x)
x(1) = 42
!$omp end target
!CHECK: omp.target {
!CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
!CHECK-DAG: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x",
!CHECK-SAME: pinned, uniq_name = "_QFomp_target_privateEx"}
!CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1>
!CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]])
!CHECK-SAME: {uniq_name = "_QFomp_target_privateEx"} :
!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) ->
!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
!CHECK-DAG: %[[C42:.*]] = arith.constant 42 : i32
!CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : index
!CHECK-NEXT: %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]])
!CHECK-SAME: : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
!CHECK-NEXT: hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref<i32>
!CHECK-NEXT: omp.terminator
!CHECK-NEXT: }

end subroutine omp_target_private
29 changes: 29 additions & 0 deletions openmp/libomptarget/test/offloading/fortran/target_private.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
! Basic offloading test with a target region
! REQUIRES: flang
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
! UNSUPPORTED: aarch64-unknown-linux-gnu
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
! UNSUPPORTED: x86_64-pc-linux-gnu
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO

! RUN: %libomptarget-compile-fortran-generic
! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic
program target_update
implicit none
integer :: x(1)
integer :: y(1)

x(1) = 42

!$omp target private(x) map(tofrom: y)
x(1) = 84
y(1) = x(1)
!$omp end target

print *, "x =", x(1)
print *, "y =", y(1)

end program target_update
! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}}
! CHECK: x = 42
! CHECK: y = 84

0 comments on commit a57b8d3

Please sign in to comment.