From a63c83765d056526a32d218545c2ba97cdc5aebf Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 19 Mar 2024 15:41:54 +0000 Subject: [PATCH 1/2] Reapply "[Flang][OpenMP][Lower] NFC: Move clause processing helpers into the ClauseProcessor (#85258)" This patch contains slight modifications to the reverted PR #85258 to avoid issues with constructs containing multiple reduction clauses, uncovered by a test on the gfortran testsuite. This reverts commit 9f80444c2e669237a5c92013f1a42b91b5609012. --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 59 ++++++++++++++--- flang/lib/Lower/OpenMP/ClauseProcessor.h | 15 ++--- flang/lib/Lower/OpenMP/OpenMP.cpp | 74 +++------------------- flang/lib/Lower/OpenMP/Utils.cpp | 19 ++++++ flang/lib/Lower/OpenMP/Utils.h | 3 + 5 files changed, 90 insertions(+), 80 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index ae798b5c0a58fa..73bf85933fec82 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -208,6 +208,25 @@ addUseDeviceClause(Fortran::lower::AbstractConverter &converter, useDeviceSymbols.push_back(object.id()); } +static void convertLoopBounds(Fortran::lower::AbstractConverter &converter, + mlir::Location loc, + llvm::SmallVectorImpl &lowerBound, + llvm::SmallVectorImpl &upperBound, + llvm::SmallVectorImpl &step, + std::size_t loopVarTypeSize) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + // The types of lower bound, upper bound, and step are converted into the + // type of the loop variable if necessary. + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + for (unsigned it = 0; it < (unsigned)lowerBound.size(); it++) { + lowerBound[it] = + firOpBuilder.createConvert(loc, loopVarType, lowerBound[it]); + upperBound[it] = + firOpBuilder.createConvert(loc, loopVarType, upperBound[it]); + step[it] = firOpBuilder.createConvert(loc, loopVarType, step[it]); + } +} + //===----------------------------------------------------------------------===// // ClauseProcessor unique clauses //===----------------------------------------------------------------------===// @@ -217,8 +236,7 @@ bool ClauseProcessor::processCollapse( llvm::SmallVectorImpl &lowerBound, llvm::SmallVectorImpl &upperBound, llvm::SmallVectorImpl &step, - llvm::SmallVectorImpl &iv, - std::size_t &loopVarTypeSize) const { + llvm::SmallVectorImpl &iv) const { bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -236,7 +254,7 @@ bool ClauseProcessor::processCollapse( found = true; } - loopVarTypeSize = 0; + std::size_t loopVarTypeSize = 0; do { Fortran::lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); @@ -267,6 +285,9 @@ bool ClauseProcessor::processCollapse( &*std::next(doConstructEval->getNestedEvaluations().begin()); } while (collapseValue > 0); + convertLoopBounds(converter, currentLocation, lowerBound, upperBound, step, + loopVarTypeSize); + return found; } @@ -906,17 +927,39 @@ bool ClauseProcessor::processMap( bool ClauseProcessor::processReduction( mlir::Location currentLocation, - llvm::SmallVectorImpl &reductionVars, - llvm::SmallVectorImpl &reductionDeclSymbols, - llvm::SmallVectorImpl *reductionSymbols) - const { + llvm::SmallVectorImpl &outReductionVars, + llvm::SmallVectorImpl &outReductionTypes, + llvm::SmallVectorImpl &outReductionDeclSymbols, + llvm::SmallVectorImpl + *outReductionSymbols) const { return findRepeatableClause( [&](const omp::clause::Reduction &clause, const Fortran::parser::CharBlock &) { + // Use local lists of reductions to prevent variables from other + // already-processed reduction clauses from impacting this reduction. + // For example, the whole `reductionVars` array is queried to decide + // whether to do the reduction byref. + llvm::SmallVector reductionVars; + llvm::SmallVector reductionDeclSymbols; + llvm::SmallVector reductionSymbols; ReductionProcessor rp; rp.addDeclareReduction(currentLocation, converter, clause, reductionVars, reductionDeclSymbols, - reductionSymbols); + outReductionSymbols ? &reductionSymbols + : nullptr); + + // Copy local lists into the output. + llvm::copy(reductionVars, std::back_inserter(outReductionVars)); + llvm::copy(reductionDeclSymbols, + std::back_inserter(outReductionDeclSymbols)); + if (outReductionSymbols) + llvm::copy(reductionSymbols, + std::back_inserter(*outReductionSymbols)); + + outReductionTypes.reserve(outReductionTypes.size() + + reductionVars.size()); + llvm::transform(reductionVars, std::back_inserter(outReductionTypes), + [](mlir::Value v) { return v.getType(); }); }); } diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index c2db0cfc3cb7bd..c88b8dc5094497 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -56,14 +56,12 @@ class ClauseProcessor { clauses(makeList(clauses, semaCtx)) {} // 'Unique' clauses: They can appear at most once in the clause list. - bool - processCollapse(mlir::Location currentLocation, - Fortran::lower::pft::Evaluation &eval, - llvm::SmallVectorImpl &lowerBound, - llvm::SmallVectorImpl &upperBound, - llvm::SmallVectorImpl &step, - llvm::SmallVectorImpl &iv, - std::size_t &loopVarTypeSize) const; + bool processCollapse( + mlir::Location currentLocation, Fortran::lower::pft::Evaluation &eval, + llvm::SmallVectorImpl &lowerBound, + llvm::SmallVectorImpl &upperBound, + llvm::SmallVectorImpl &step, + llvm::SmallVectorImpl &iv) const; bool processDefault() const; bool processDevice(Fortran::lower::StatementContext &stmtCtx, mlir::Value &result) const; @@ -126,6 +124,7 @@ class ClauseProcessor { bool processReduction(mlir::Location currentLocation, llvm::SmallVectorImpl &reductionVars, + llvm::SmallVectorImpl &reductionTypes, llvm::SmallVectorImpl &reductionDeclSymbols, llvm::SmallVectorImpl *reductionSymbols = nullptr) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 5c4caa1de57382..382bf49d35aa58 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -214,24 +214,6 @@ static void threadPrivatizeVars(Fortran::lower::AbstractConverter &converter, firOpBuilder.restoreInsertionPoint(insPt); } -static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, - std::size_t loopVarTypeSize) { - // OpenMP runtime requires 32-bit or 64-bit loop variables. - loopVarTypeSize = loopVarTypeSize * 8; - if (loopVarTypeSize < 32) { - loopVarTypeSize = 32; - } else if (loopVarTypeSize > 64) { - loopVarTypeSize = 64; - mlir::emitWarning(converter.getCurrentLocation(), - "OpenMP loop iteration variable cannot have more than 64 " - "bits size and will be narrowed into 64 bits."); - } - assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) && - "OpenMP loop iteration variable size must be transformed into 32-bit " - "or 64-bit"); - return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); -} - static mlir::Operation * createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, mlir::Location loc, mlir::Value indexVal, @@ -568,6 +550,7 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, mlir::omp::ClauseProcBindKindAttr procBindKindAttr; llvm::SmallVector allocateOperands, allocatorOperands, reductionVars; + llvm::SmallVector reductionTypes; llvm::SmallVector reductionDeclSymbols; llvm::SmallVector reductionSymbols; @@ -578,13 +561,8 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, cp.processDefault(); cp.processAllocate(allocatorOperands, allocateOperands); if (!outerCombined) - cp.processReduction(currentLocation, reductionVars, reductionDeclSymbols, - &reductionSymbols); - - llvm::SmallVector reductionTypes; - reductionTypes.reserve(reductionVars.size()); - llvm::transform(reductionVars, std::back_inserter(reductionTypes), - [](mlir::Value v) { return v.getType(); }); + cp.processReduction(currentLocation, reductionVars, reductionTypes, + reductionDeclSymbols, &reductionSymbols); auto reductionCallback = [&](mlir::Operation *op) { llvm::SmallVector locs(reductionVars.size(), @@ -1468,25 +1446,6 @@ genOMP(Fortran::lower::AbstractConverter &converter, standaloneConstruct.u); } -static void convertLoopBounds(Fortran::lower::AbstractConverter &converter, - mlir::Location loc, - llvm::SmallVectorImpl &lowerBound, - llvm::SmallVectorImpl &upperBound, - llvm::SmallVectorImpl &step, - std::size_t loopVarTypeSize) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - // The types of lower bound, upper bound, and step are converted into the - // type of the loop variable if necessary. - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - for (unsigned it = 0; it < (unsigned)lowerBound.size(); it++) { - lowerBound[it] = - firOpBuilder.createConvert(loc, loopVarType, lowerBound[it]); - upperBound[it] = - firOpBuilder.createConvert(loc, loopVarType, upperBound[it]); - step[it] = firOpBuilder.createConvert(loc, loopVarType, step[it]); - } -} - static llvm::SmallVector genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, mlir::Location &loc, @@ -1520,7 +1479,7 @@ genLoopAndReductionVars( mlir::Location &loc, llvm::ArrayRef loopArgs, llvm::ArrayRef reductionArgs, - llvm::SmallVectorImpl &reductionTypes) { + llvm::ArrayRef reductionTypes) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); llvm::SmallVector blockArgTypes; @@ -1582,16 +1541,15 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter, llvm::SmallVector lowerBound, upperBound, step, reductionVars; llvm::SmallVector alignedVars, nontemporalVars; llvm::SmallVector iv; + llvm::SmallVector reductionTypes; llvm::SmallVector reductionDeclSymbols; mlir::omp::ClauseOrderKindAttr orderClauseOperand; mlir::IntegerAttr simdlenClauseOperand, safelenClauseOperand; - std::size_t loopVarTypeSize; ClauseProcessor cp(converter, semaCtx, loopOpClauseList); - cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv, - loopVarTypeSize); + cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv); cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand); - cp.processReduction(loc, reductionVars, reductionDeclSymbols); + cp.processReduction(loc, reductionVars, reductionTypes, reductionDeclSymbols); cp.processIf(clause::If::DirectiveNameModifier::Simd, ifClauseOperand); cp.processSimdlen(simdlenClauseOperand); cp.processSafelen(safelenClauseOperand); @@ -1601,9 +1559,6 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter, Fortran::parser::OmpClause::Nontemporal, Fortran::parser::OmpClause::Order>(loc, ompDirective); - convertLoopBounds(converter, loc, lowerBound, upperBound, step, - loopVarTypeSize); - mlir::TypeRange resultType; auto simdLoopOp = firOpBuilder.create( loc, resultType, lowerBound, upperBound, step, alignedVars, @@ -1641,6 +1596,7 @@ static void createWsloop(Fortran::lower::AbstractConverter &converter, llvm::SmallVector lowerBound, upperBound, step, reductionVars; llvm::SmallVector linearVars, linearStepVars; llvm::SmallVector iv; + llvm::SmallVector reductionTypes; llvm::SmallVector reductionDeclSymbols; llvm::SmallVector reductionSymbols; mlir::omp::ClauseOrderKindAttr orderClauseOperand; @@ -1648,20 +1604,15 @@ static void createWsloop(Fortran::lower::AbstractConverter &converter, mlir::UnitAttr nowaitClauseOperand, byrefOperand, scheduleSimdClauseOperand; mlir::IntegerAttr orderedClauseOperand; mlir::omp::ScheduleModifierAttr scheduleModClauseOperand; - std::size_t loopVarTypeSize; ClauseProcessor cp(converter, semaCtx, beginClauseList); - cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv, - loopVarTypeSize); + cp.processCollapse(loc, eval, lowerBound, upperBound, step, iv); cp.processScheduleChunk(stmtCtx, scheduleChunkClauseOperand); - cp.processReduction(loc, reductionVars, reductionDeclSymbols, + cp.processReduction(loc, reductionVars, reductionTypes, reductionDeclSymbols, &reductionSymbols); cp.processTODO(loc, ompDirective); - convertLoopBounds(converter, loc, lowerBound, upperBound, step, - loopVarTypeSize); - if (ReductionProcessor::doReductionByRef(reductionVars)) byrefOperand = firOpBuilder.getUnitAttr(); @@ -1702,11 +1653,6 @@ static void createWsloop(Fortran::lower::AbstractConverter &converter, auto *nestedEval = getCollapsedLoopEval( eval, Fortran::lower::getCollapseValue(beginClauseList)); - llvm::SmallVector reductionTypes; - reductionTypes.reserve(reductionVars.size()); - llvm::transform(reductionVars, std::back_inserter(reductionTypes), - [](mlir::Value v) { return v.getType(); }); - auto ivCallback = [&](mlir::Operation *op) { return genLoopAndReductionVars(op, converter, loc, iv, reductionSymbols, reductionTypes); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index fa4a51e3384837..b9c0660aa4da8e 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -70,6 +71,24 @@ void genObjectList2(const Fortran::parser::OmpObjectList &objectList, } } +mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, + std::size_t loopVarTypeSize) { + // OpenMP runtime requires 32-bit or 64-bit loop variables. + loopVarTypeSize = loopVarTypeSize * 8; + if (loopVarTypeSize < 32) { + loopVarTypeSize = 32; + } else if (loopVarTypeSize > 64) { + loopVarTypeSize = 64; + mlir::emitWarning(converter.getCurrentLocation(), + "OpenMP loop iteration variable cannot have more than 64 " + "bits size and will be narrowed into 64 bits."); + } + assert((loopVarTypeSize == 32 || loopVarTypeSize == 64) && + "OpenMP loop iteration variable size must be transformed into 32-bit " + "or 64-bit"); + return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); +} + void gatherFuncAndVarSyms( const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause, llvm::SmallVectorImpl &symbolAndClause) { diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 3ab0823a462148..4074bf73987d5b 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -51,6 +51,9 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, mlir::omp::VariableCaptureKind mapCaptureType, mlir::Type retTy, bool isVal = false); +mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, + std::size_t loopVarTypeSize); + void gatherFuncAndVarSyms( const ObjectList &objects, mlir::omp::DeclareTargetCaptureClause clause, llvm::SmallVectorImpl &symbolAndClause); From 3913d9bb69a997673b1925985eea4b1f074738e7 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Wed, 20 Mar 2024 12:23:11 +0000 Subject: [PATCH 2/2] Add multiple reduction test --- .../Lower/OpenMP/wsloop-reduction-multi.f90 | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 flang/test/Lower/OpenMP/wsloop-reduction-multi.f90 diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90 new file mode 100644 index 00000000000000..9e9951c399c920 --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-reduction-multi.f90 @@ -0,0 +1,81 @@ +! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +!CHECK-LABEL: omp.declare_reduction +!CHECK-SAME: @[[MIN_RED_I32_NAME:.*]] : i32 init { +!CHECK: ^bb0(%{{.*}}: i32): +!CHECK: %[[C0_1:.*]] = arith.constant 2147483647 : i32 +!CHECK: omp.yield(%[[C0_1]] : i32) +!CHECK: } combiner { +!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): +!CHECK: %[[RES:.*]] = arith.minsi %[[ARG0]], %[[ARG1]] : i32 +!CHECK: omp.yield(%[[RES]] : i32) +!CHECK: } + +!CHECK-LABEL: omp.declare_reduction +!CHECK-SAME: @[[ADD_RED_F32_NAME:.*]] : f32 init { +!CHECK: ^bb0(%{{.*}}: f32): +!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f32 +!CHECK: omp.yield(%[[C0_1]] : f32) +!CHECK: } combiner { +!CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32): +!CHECK: %[[RES:.*]] = arith.addf %[[ARG0]], %[[ARG1]] {{.*}} : f32 +!CHECK: omp.yield(%[[RES]] : f32) +!CHECK: } + +!CHECK-LABEL: omp.declare_reduction +!CHECK-SAME: @[[ADD_RED_I32_NAME:.*]] : i32 init { +!CHECK: ^bb0(%{{.*}}: i32): +!CHECK: %[[C0_1:.*]] = arith.constant 0 : i32 +!CHECK: omp.yield(%[[C0_1]] : i32) +!CHECK: } combiner { +!CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): +!CHECK: %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i32 +!CHECK: omp.yield(%[[RES]] : i32) +!CHECK: } + +!CHECK-LABEL: func.func @_QPmultiple_reduction +!CHECK: %[[X_REF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_reductionEx"} +!CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X_REF]] {uniq_name = "_QFmultiple_reductionEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_reductionEy"} +!CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y_REF]] {uniq_name = "_QFmultiple_reductionEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[Z_REF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_reductionEz"} +!CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z_REF]] {uniq_name = "_QFmultiple_reductionEz"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: omp.wsloop reduction( +!CHECK-SAME: @[[ADD_RED_I32_NAME]] %[[X_DECL]]#0 -> %[[PRV_X:.+]] : !fir.ref, +!CHECK-SAME: @[[ADD_RED_F32_NAME]] %[[Y_DECL]]#0 -> %[[PRV_Y:.+]] : !fir.ref, +!CHECK-SAME: @[[MIN_RED_I32_NAME]] %[[Z_DECL]]#0 -> %[[PRV_Z:.+]] : !fir.ref) {{.*}}{ +!CHECK: %[[PRV_X_DECL:.+]]:2 = hlfir.declare %[[PRV_X]] {{.*}} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[PRV_Y_DECL:.+]]:2 = hlfir.declare %[[PRV_Y]] {{.*}} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[PRV_Z_DECL:.+]]:2 = hlfir.declare %[[PRV_Z]] {{.*}} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[LPRV_X:.+]] = fir.load %[[PRV_X_DECL]]#0 : !fir.ref +!CHECK: %[[RES_X:.+]] = arith.addi %[[LPRV_X]], %{{.+}} : i32 +!CHECK: hlfir.assign %[[RES_X]] to %[[PRV_X_DECL]]#0 : i32, !fir.ref +!CHECK: %[[LPRV_Y:.+]] = fir.load %[[PRV_Y_DECL]]#0 : !fir.ref +!CHECK: %[[RES_Y:.+]] = arith.addf %[[LPRV_Y]], %{{.+}} : f32 +!CHECK: hlfir.assign %[[RES_Y]] to %[[PRV_Y_DECL]]#0 : f32, !fir.ref +!CHECK: %[[LPRV_Z:.+]] = fir.load %[[PRV_Z_DECL]]#0 : !fir.ref +!CHECK: %[[RES_Z:.+]] = arith.select %{{.+}}, %[[LPRV_Z]], %{{.+}} : i32 +!CHECK: hlfir.assign %[[RES_Z]] to %[[PRV_Z_DECL]]#0 : i32, !fir.ref +!CHECK: omp.yield +!CHECK: } +!CHECK: return +subroutine multiple_reduction(v) + implicit none + integer, intent(in) :: v(:) + integer :: i + integer :: x + real :: y + integer:: z + x = 0 + y = 0.0 + z = 10 + + !$omp do reduction(+:x,y) reduction(min:z) + do i=1, 100 + x = x + v(i) + y = y + 1.5 * v(i) + z = min(z, v(i)) + end do + !$omp end do +end subroutine