diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 7dd25f75d9eb76..35202d2a8fdc6b 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -143,15 +143,17 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter, //===----------------------------------------------------------------------===// class DataSharingProcessor { + using SymbolSet = llvm::SetVector; + bool hasLastPrivateOp; mlir::OpBuilder::InsertPoint lastPrivIP; mlir::OpBuilder::InsertPoint insPt; mlir::Value loopIV; // Symbols in private, firstprivate, and/or lastprivate clauses. - llvm::SetVector privatizedSymbols; - llvm::SetVector defaultSymbols; - llvm::SetVector symbolsInNestedRegions; - llvm::SetVector symbolsInParentRegions; + SymbolSet privatizedSymbols; + SymbolSet defaultSymbols; + SymbolSet symbolsInNestedRegions; + SymbolSet symbolsInParentRegions; Fortran::lower::AbstractConverter &converter; fir::FirOpBuilder &firOpBuilder; const Fortran::parser::OmpClauseList &opClauseList; @@ -182,35 +184,54 @@ class DataSharingProcessor { : hasLastPrivateOp(false), converter(converter), firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList), eval(eval) {} - // Privatisation is split into two steps. - // Step1 performs cloning of all privatisation clauses and copying for - // firstprivates. Step1 is performed at the place where process/processStep1 + // Privatisation is split into 3 steps: + // + // * Step1: collects all symbols that should be privatized. + // + // * Step2: performs cloning of all privatisation clauses and copying for + // firstprivates. Step2 is performed at the place where process/processStep2 // is called. This is usually inside the Operation corresponding to the OpenMP - // construct, for looping constructs this is just before the Operation. The - // split into two steps was performed basically to be able to call - // privatisation for looping constructs before the operation is created since - // the bounds of the MLIR OpenMP operation can be privatised. - // Step2 performs the copying for lastprivates and requires knowledge of the - // MLIR operation to insert the last private update. Step2 adds + // construct, for looping constructs this is just before the Operation. + // + // * Step3: performs the copying for lastprivates and requires knowledge of + // the MLIR operation to insert the last private update. Step3 adds // dealocation code as well. + // + // The split was performed for the following reasons: + // + // 1. Step1 was split so that the `target` op knows which symbols should not + // be mapped into the target region due to being `private`. The implicit + // mapping happens before the op body is generated so we need to to collect + // the private symbols first and then later in the body actually privatize + // them. + // + // 2. Step2 was split in order to call privatisation for looping constructs + // before the operation is created since the bounds of the MLIR OpenMP + // operation can be privatised. void processStep1(); - void processStep2(mlir::Operation *op, bool isLoop); + void processStep2(); + void processStep3(mlir::Operation *op, bool isLoop); void setLoopIV(mlir::Value iv) { assert(!loopIV && "Loop iteration variable already set"); loopIV = iv; } + + const SymbolSet &getPrivatizedSymbols() const { return privatizedSymbols; } }; void DataSharingProcessor::processStep1() { collectSymbolsForPrivatization(); collectDefaultSymbols(); +} + +void DataSharingProcessor::processStep2() { privatize(); defaultPrivatize(); insertBarrier(); } -void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) { +void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) { insPt = firOpBuilder.saveInsertionPoint(); copyLastPrivatize(op); firOpBuilder.restoreInsertionPoint(insPt); @@ -2306,11 +2327,12 @@ static void createBodyOfOp( if (!dsp) { DataSharingProcessor proc(converter, *clauses, eval); proc.processStep1(); - proc.processStep2(op, isLoop); + proc.processStep2(); + proc.processStep3(op, isLoop); } else { if (isLoop && args.size() > 0) dsp->setLoopIV(converter.getSymbolAddress(*args[0])); - dsp->processStep2(op, isLoop); + dsp->processStep3(op, isLoop); } if (storeOp) @@ -2648,7 +2670,9 @@ static void genBodyOfTargetOp( const llvm::SmallVector &mapSymTypes, const llvm::SmallVector &mapSymLocs, const llvm::SmallVector &mapSymbols, - const mlir::Location ¤tLocation) { + const mlir::Location ¤tLocation, + const Fortran::parser::OmpClauseList &clauseList, + DataSharingProcessor &dsp) { assert(mapSymTypes.size() == mapSymLocs.size()); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -2657,6 +2681,8 @@ static void genBodyOfTargetOp( auto *regionBlock = firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); + dsp.processStep2(); + // Clones the `bounds` placing them inside the target region and returns them. auto cloneBound = [&](mlir::Value bound) { if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { @@ -2811,8 +2837,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, cp.processNowait(nowaitAttr); cp.processMap(currentLocation, directive, semanticsContext, stmtCtx, mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols); - cp.processTODO( currentLocation, llvm::omp::Directive::OMPD_target); + DataSharingProcessor dsp(converter, clauseList, eval); + dsp.processStep1(); + // 5.8.1 Implicit Data-Mapping Attribute Rules // The following code follows the implicit data-mapping rules to map all the - // symbols used inside the region that have not been explicitly mapped using - // the map clause. + // symbols used inside the region that do not have explicit data-environment + // attribute clauses (neither data-sharing; e.g. `private`, nor `map` + // clauses). auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) { + if (dsp.getPrivatizedSymbols().contains(&sym)) { + return; + } + if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) { mlir::Value baseOp = converter.getSymbolAddress(sym); if (!baseOp) @@ -2893,7 +2926,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, nowaitAttr, mapOperands); genBodyOfTargetOp(converter, eval, genNested, targetOp, mapSymTypes, - mapSymLocs, mapSymbols, currentLocation); + mapSymLocs, mapSymbols, currentLocation, clauseList, dsp); return targetOp; } @@ -3127,6 +3160,7 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); DataSharingProcessor dsp(converter, loopOpClauseList, eval); dsp.processStep1(); + dsp.processStep2(); Fortran::lower::StatementContext stmtCtx; mlir::Value scheduleChunkClauseOperand, ifClauseOperand; @@ -3179,6 +3213,7 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); DataSharingProcessor dsp(converter, beginClauseList, eval); dsp.processStep1(); + dsp.processStep2(); Fortran::lower::StatementContext stmtCtx; mlir::Value scheduleChunkClauseOperand; diff --git a/flang/test/Lower/OpenMP/target_private.f90 b/flang/test/Lower/OpenMP/target_private.f90 new file mode 100644 index 00000000000000..98e3b79d035dfd --- /dev/null +++ b/flang/test/Lower/OpenMP/target_private.f90 @@ -0,0 +1,30 @@ +!Test data-sharing attribute clauses for the `target` directive. + +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +!CHECK-LABEL: func.func @_QPomp_target_private() +subroutine omp_target_private + implicit none + integer :: x(1) + +!$omp target private(x) + x(1) = 42 +!$omp end target +!CHECK: omp.target { +!CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +!CHECK-DAG: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x", +!CHECK-SAME: pinned, uniq_name = "_QFomp_target_privateEx"} +!CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1> +!CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]]) +!CHECK-SAME: {uniq_name = "_QFomp_target_privateEx"} : +!CHECK-SAME: (!fir.ref>, !fir.shape<1>) -> +!CHECK-SAME: (!fir.ref>, !fir.ref>) +!CHECK-DAG: %[[C42:.*]] = arith.constant 42 : i32 +!CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : index +!CHECK-NEXT: %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]]) +!CHECK-SAME: : (!fir.ref>, index) -> !fir.ref +!CHECK-NEXT: hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref +!CHECK-NEXT: omp.terminator +!CHECK-NEXT: } + +end subroutine omp_target_private diff --git a/openmp/libomptarget/test/offloading/fortran/target_private.f90 b/openmp/libomptarget/test/offloading/fortran/target_private.f90 new file mode 100644 index 00000000000000..486c23ec2ec8d2 --- /dev/null +++ b/openmp/libomptarget/test/offloading/fortran/target_private.f90 @@ -0,0 +1,29 @@ +! Basic offloading test with a target region +! REQUIRES: flang +! UNSUPPORTED: nvptx64-nvidia-cuda-LTO +! UNSUPPORTED: aarch64-unknown-linux-gnu +! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +! UNSUPPORTED: x86_64-pc-linux-gnu +! UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +! RUN: %libomptarget-compile-fortran-generic +! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic +program target_update + implicit none + integer :: x(1) + integer :: y(1) + + x(1) = 42 + +!$omp target private(x) map(tofrom: y) + x(1) = 84 + y(1) = x(1) +!$omp end target + + print *, "x =", x(1) + print *, "y =", y(1) + +end program target_update +! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}} +! CHECK: x = 42 +! CHECK: y = 84