From 4cb2a519db10f54815c8a4ccd5accbedc1cdfd07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 23 Dec 2024 21:27:34 -0800 Subject: [PATCH] Revert "Reland '[flang] Allow to pass an async id to allocate the descriptor (#118713)' and #118733" (#121029) This still cause issue for device runtime build. --- flang/include/flang/Runtime/CUDA/allocator.h | 8 +++--- flang/include/flang/Runtime/CUDA/common.h | 3 --- flang/include/flang/Runtime/allocatable.h | 6 ++--- .../flang/Runtime/allocator-registry.h | 10 +++---- flang/include/flang/Runtime/descriptor.h | 2 +- flang/lib/Lower/Allocatable.cpp | 11 +++----- .../Optimizer/Builder/Runtime/Allocatable.cpp | 9 +++---- flang/runtime/CUDA/allocatable.cpp | 2 +- flang/runtime/CUDA/allocator.cpp | 15 ++++------- flang/runtime/CUDA/descriptor.cpp | 3 +-- flang/runtime/allocatable.cpp | 10 +++---- flang/runtime/array-constructor.cpp | 8 +++--- flang/runtime/descriptor.cpp | 4 +-- flang/test/HLFIR/elemental-codegen.fir | 6 ++--- flang/test/Lower/OpenACC/acc-declare.f90 | 4 +-- flang/test/Lower/allocatable-polymorphic.f90 | 26 +++++++++---------- flang/test/Lower/allocatable-runtime.f90 | 4 +-- flang/test/Lower/allocate-mold.f90 | 4 +-- flang/test/Lower/polymorphic.f90 | 6 ++--- flang/unittests/Runtime/CUDA/Allocatable.cpp | 3 +-- flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 21 +-------------- flang/unittests/Runtime/CUDA/Memory.cpp | 3 +-- 22 files changed, 63 insertions(+), 105 deletions(-) diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h index b6f0e7f303176cb..4fb4c94c5e9b0a2 100644 --- a/flang/include/flang/Runtime/CUDA/allocator.h +++ b/flang/include/flang/Runtime/CUDA/allocator.h @@ -20,16 +20,16 @@ extern "C" { void RTDECL(CUFRegisterAllocator)(); } -void *CUFAllocPinned(std::size_t, std::int64_t = kCudaNoStream); +void *CUFAllocPinned(std::size_t); void CUFFreePinned(void *); -void *CUFAllocDevice(std::size_t, std::int64_t); +void *CUFAllocDevice(std::size_t); void CUFFreeDevice(void *); -void *CUFAllocManaged(std::size_t, std::int64_t = kCudaNoStream); +void *CUFAllocManaged(std::size_t); void CUFFreeManaged(void *); -void *CUFAllocUnified(std::size_t, std::int64_t = kCudaNoStream); +void *CUFAllocUnified(std::size_t); void CUFFreeUnified(void *); } // namespace Fortran::runtime::cuda diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h index 9c95f727ee67341..474f8e6578b891b 100644 --- a/flang/include/flang/Runtime/CUDA/common.h +++ b/flang/include/flang/Runtime/CUDA/common.h @@ -23,9 +23,6 @@ static constexpr unsigned kHostToDevice = 0; static constexpr unsigned kDeviceToHost = 1; static constexpr unsigned kDeviceToDevice = 2; -/// Value used for asyncId when no specific stream is specified. -static constexpr std::int64_t kCudaNoStream = -1; - #define CUDA_REPORT_IF_ERROR(expr) \ [](cudaError_t err) { \ if (err == cudaSuccess) \ diff --git a/flang/include/flang/Runtime/allocatable.h b/flang/include/flang/Runtime/allocatable.h index 121c31af963aa02..58061d9862095eb 100644 --- a/flang/include/flang/Runtime/allocatable.h +++ b/flang/include/flang/Runtime/allocatable.h @@ -94,9 +94,9 @@ int RTDECL(AllocatableCheckLengthParameter)(Descriptor &, // Successfully allocated memory is initialized if the allocatable has a // derived type, and is always initialized by AllocatableAllocateSource(). // Performs all necessary coarray synchronization and validation actions. -int RTDECL(AllocatableAllocate)(Descriptor &, std::int64_t asyncId = -1, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); +int RTDECL(AllocatableAllocate)(Descriptor &, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); int RTDECL(AllocatableAllocateSource)(Descriptor &, const Descriptor &source, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); diff --git a/flang/include/flang/Runtime/allocator-registry.h b/flang/include/flang/Runtime/allocator-registry.h index 4c3295edf13d9a3..29302c5d825bc9b 100644 --- a/flang/include/flang/Runtime/allocator-registry.h +++ b/flang/include/flang/Runtime/allocator-registry.h @@ -11,7 +11,6 @@ #include "flang/Common/api-attrs.h" #include "flang/Runtime/allocator-registry-consts.h" -#include #include #include @@ -19,7 +18,7 @@ namespace Fortran::runtime { -using AllocFct = void *(*)(std::size_t, std::int64_t); +using AllocFct = void *(*)(std::size_t); using FreeFct = void (*)(void *); typedef struct Allocator_t { @@ -27,11 +26,10 @@ typedef struct Allocator_t { FreeFct free{nullptr}; } Allocator_t; -static RT_API_ATTRS void *MallocWrapper( - std::size_t size, [[maybe_unused]] std::int64_t) { +#ifdef RT_DEVICE_COMPILATION +static RT_API_ATTRS void *MallocWrapper(std::size_t size) { return std::malloc(size); } -#ifdef RT_DEVICE_COMPILATION static RT_API_ATTRS void FreeWrapper(void *p) { return std::free(p); } #endif @@ -41,7 +39,7 @@ struct AllocatorRegistry { : allocators{{&MallocWrapper, &FreeWrapper}} {} #else constexpr AllocatorRegistry() { - allocators[kDefaultAllocator] = {&MallocWrapper, &std::free}; + allocators[kDefaultAllocator] = {&std::malloc, &std::free}; }; #endif RT_API_ATTRS void Register(int, Allocator_t); diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h index 44e82c6a256873b..dd36fba157ca92b 100644 --- a/flang/include/flang/Runtime/descriptor.h +++ b/flang/include/flang/Runtime/descriptor.h @@ -369,7 +369,7 @@ class Descriptor { // before calling. It (re)computes the byte strides after // allocation. Does not allocate automatic components or // perform default component initialization. - RT_API_ATTRS int Allocate(std::int64_t asyncId = -1); + RT_API_ATTRS int Allocate(); RT_API_ATTRS void SetByteStrides(); // Deallocates storage; does not call FINAL subroutines or diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index f1436564aabaa2e..fb8380ac7e8c519 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -184,14 +184,9 @@ static mlir::Value genRuntimeAllocate(fir::FirOpBuilder &builder, ? fir::runtime::getRuntimeFunc(loc, builder) : fir::runtime::getRuntimeFunc(loc, builder); - llvm::SmallVector args{box.getAddr()}; - if (!box.isPointer()) - args.push_back( - builder.createIntegerConstant(loc, builder.getI64Type(), -1)); - args.push_back(errorManager.hasStat); - args.push_back(errorManager.errMsgAddr); - args.push_back(errorManager.sourceFile); - args.push_back(errorManager.sourceLine); + llvm::SmallVector args{ + box.getAddr(), errorManager.hasStat, errorManager.errMsgAddr, + errorManager.sourceFile, errorManager.sourceLine}; llvm::SmallVector operands; for (auto [fst, snd] : llvm::zip(args, callee.getFunctionType().getInputs())) operands.emplace_back(builder.createConvert(loc, snd, fst)); diff --git a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp index 28452d3b486da3c..70a88ff18cb1dae 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp @@ -76,19 +76,16 @@ void fir::runtime::genAllocatableAllocate(fir::FirOpBuilder &builder, mlir::func::FuncOp func{ fir::runtime::getRuntimeFunc(loc, builder)}; mlir::FunctionType fTy{func.getFunctionType()}; - mlir::Value asyncId = - builder.createIntegerConstant(loc, builder.getI64Type(), -1); mlir::Value sourceFile{fir::factory::locationToFilename(builder, loc)}; mlir::Value sourceLine{ - fir::factory::locationToLineNo(builder, loc, fTy.getInput(5))}; + fir::factory::locationToLineNo(builder, loc, fTy.getInput(4))}; if (!hasStat) hasStat = builder.createBool(loc, false); if (!errMsg) { mlir::Type boxNoneTy = fir::BoxType::get(builder.getNoneType()); errMsg = builder.create(loc, boxNoneTy).getResult(); } - llvm::SmallVector args{ - fir::runtime::createArguments(builder, loc, fTy, desc, asyncId, hasStat, - errMsg, sourceFile, sourceLine)}; + llvm::SmallVector args{fir::runtime::createArguments( + builder, loc, fTy, desc, hasStat, errMsg, sourceFile, sourceLine)}; builder.create(loc, func, args); } diff --git a/flang/runtime/CUDA/allocatable.cpp b/flang/runtime/CUDA/allocatable.cpp index 3f6f8f3d6d5de0a..9be54e8906903d2 100644 --- a/flang/runtime/CUDA/allocatable.cpp +++ b/flang/runtime/CUDA/allocatable.cpp @@ -52,7 +52,7 @@ int RTDEF(CUFAllocatableAllocate)(Descriptor &desc, int64_t stream, } // Perform the standard allocation. int stat{RTNAME(AllocatableAllocate)( - desc, stream, hasStat, errMsg, sourceFile, sourceLine)}; + desc, hasStat, errMsg, sourceFile, sourceLine)}; return stat; } diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp index d848f1811dcf3fd..85b3daf65a8ba4f 100644 --- a/flang/runtime/CUDA/allocator.cpp +++ b/flang/runtime/CUDA/allocator.cpp @@ -33,7 +33,7 @@ void RTDEF(CUFRegisterAllocator)() { } } -void *CUFAllocPinned(std::size_t sizeInBytes, std::int64_t) { +void *CUFAllocPinned(std::size_t sizeInBytes) { void *p; CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes)); return p; @@ -41,20 +41,15 @@ void *CUFAllocPinned(std::size_t sizeInBytes, std::int64_t) { void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); } -void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t stream) { +void *CUFAllocDevice(std::size_t sizeInBytes) { void *p; - if (stream >= 0) { - CUDA_REPORT_IF_ERROR( - cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)stream)); - } else { - CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes)); - } + CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes)); return p; } void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); } -void *CUFAllocManaged(std::size_t sizeInBytes, std::int64_t) { +void *CUFAllocManaged(std::size_t sizeInBytes) { void *p; CUDA_REPORT_IF_ERROR( cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal)); @@ -63,7 +58,7 @@ void *CUFAllocManaged(std::size_t sizeInBytes, std::int64_t) { void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); } -void *CUFAllocUnified(std::size_t sizeInBytes, std::int64_t) { +void *CUFAllocUnified(std::size_t sizeInBytes) { // Call alloc managed for the time being. return CUFAllocManaged(sizeInBytes); } diff --git a/flang/runtime/CUDA/descriptor.cpp b/flang/runtime/CUDA/descriptor.cpp index 816b1458ee52cbf..391c47e84241d44 100644 --- a/flang/runtime/CUDA/descriptor.cpp +++ b/flang/runtime/CUDA/descriptor.cpp @@ -20,8 +20,7 @@ RT_EXT_API_GROUP_BEGIN Descriptor *RTDEF(CUFAllocDescriptor)( std::size_t sizeInBytes, const char *sourceFile, int sourceLine) { - return reinterpret_cast( - CUFAllocManaged(sizeInBytes, kCudaNoStream)); + return reinterpret_cast(CUFAllocManaged(sizeInBytes)); } void RTDEF(CUFFreeDescriptor)( diff --git a/flang/runtime/allocatable.cpp b/flang/runtime/allocatable.cpp index b65cec8d51cf862..5e065f47636a899 100644 --- a/flang/runtime/allocatable.cpp +++ b/flang/runtime/allocatable.cpp @@ -133,17 +133,15 @@ void RTDEF(AllocatableApplyMold)( } } -int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId, - bool hasStat, const Descriptor *errMsg, const char *sourceFile, - int sourceLine) { +int RTDEF(AllocatableAllocate)(Descriptor &descriptor, bool hasStat, + const Descriptor *errMsg, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (!descriptor.IsAllocatable()) { return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat); } else if (descriptor.IsAllocated()) { return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat); } else { - int stat{ - ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)}; + int stat{ReturnError(terminator, descriptor.Allocate(), errMsg, hasStat)}; if (stat == StatOk) { if (const DescriptorAddendum * addendum{descriptor.Addendum()}) { if (const auto *derived{addendum->derivedType()}) { @@ -162,7 +160,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc, const Descriptor &source, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { int stat{RTNAME(AllocatableAllocate)( - alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)}; + alloc, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; DoFromSourceAssign(alloc, source, terminator); diff --git a/flang/runtime/array-constructor.cpp b/flang/runtime/array-constructor.cpp index 0d677d7cc63aa94..c6953167f5fb2ec 100644 --- a/flang/runtime/array-constructor.cpp +++ b/flang/runtime/array-constructor.cpp @@ -50,8 +50,8 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded( initialAllocationSize(fromElements, to.ElementBytes())}; to.GetDimension(0).SetBounds(1, allocationSize); RTNAME(AllocatableAllocate) - (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, - vector.sourceFile, vector.sourceLine); + (to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile, + vector.sourceLine); to.GetDimension(0).SetBounds(1, fromElements); vector.actualAllocationSize = allocationSize; } else { @@ -59,8 +59,8 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded( // first value: there should be no reallocation. RUNTIME_CHECK(terminator, previousToElements >= fromElements); RTNAME(AllocatableAllocate) - (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, - vector.sourceFile, vector.sourceLine); + (to, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile, + vector.sourceLine); vector.actualAllocationSize = previousToElements; } } else { diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp index f43c96bed7d00dd..32f43e89dc7a36b 100644 --- a/flang/runtime/descriptor.cpp +++ b/flang/runtime/descriptor.cpp @@ -163,7 +163,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) { #endif } -RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) { +RT_API_ATTRS int Descriptor::Allocate() { std::size_t elementBytes{ElementBytes()}; if (static_cast(elementBytes) < 0) { // F'2023 7.4.4.2 p5: "If the character length parameter value evaluates @@ -175,7 +175,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) { // Zero size allocation is possible in Fortran and the resulting // descriptor must be allocated/associated. Since std::malloc(0) // result is implementation defined, always allocate at least one byte. - void *p{alloc(byteSize ? byteSize : 1, asyncId)}; + void *p{alloc(byteSize ? byteSize : 1)}; if (!p) { return CFI_ERROR_MEM_ALLOCATION; } diff --git a/flang/test/HLFIR/elemental-codegen.fir b/flang/test/HLFIR/elemental-codegen.fir index 3c33bf8fca2d14f..0d5f343cb17711f 100644 --- a/flang/test/HLFIR/elemental-codegen.fir +++ b/flang/test/HLFIR/elemental-codegen.fir @@ -192,7 +192,7 @@ func.func @test_polymorphic(%arg0: !fir.class> {fir.bindc_ // CHECK: %[[VAL_35:.*]] = fir.absent !fir.box // CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_4]] : (!fir.ref>>>>) -> !fir.ref> // CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_31]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %{{.*}}, %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref>>>> // CHECK: %[[VAL_40:.*]] = arith.constant 1 : index // CHECK: fir.do_loop %[[VAL_41:.*]] = %[[VAL_40]] to %[[EX1]] step %[[VAL_40]] unordered { @@ -276,7 +276,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class> {fir.b // CHECK: %[[VAL_36:.*]] = fir.absent !fir.box // CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_5]] : (!fir.ref>>>>) -> !fir.ref> // CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_32]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %{{.*}}, %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref>>>> // CHECK: %[[VAL_41:.*]] = arith.constant 1 : index // CHECK: fir.do_loop %[[VAL_42:.*]] = %[[VAL_41]] to %[[VAL_3]] step %[[VAL_41]] unordered { @@ -329,7 +329,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class> {fir.b // CHECK: %[[VAL_85:.*]] = fir.absent !fir.box // CHECK: %[[VAL_86:.*]] = fir.convert %[[VAL_4]] : (!fir.ref>>>>) -> !fir.ref> // CHECK: %[[VAL_87:.*]] = fir.convert %[[VAL_81]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %{{.*}}, %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_89:.*]] = fir.load %[[VAL_63]]#0 : !fir.ref>>>> // CHECK: %[[VAL_90:.*]] = arith.constant 1 : index // CHECK: fir.do_loop %[[VAL_91:.*]] = %[[VAL_90]] to %[[VAL_3]] step %[[VAL_90]] unordered { diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90 index 9fe51a8db55e3b5..0066e712fbdcce5 100644 --- a/flang/test/Lower/OpenACC/acc-declare.f90 +++ b/flang/test/Lower/OpenACC/acc-declare.f90 @@ -469,6 +469,6 @@ subroutine init() end module ! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit() -! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: fir.if -! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index fba0c12fb889c3a..bbc54754ca1ab14 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -267,7 +267,7 @@ subroutine test_allocatable() ! CHECK: %[[C0:.*]] = arith.constant 0 : i32 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[P_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref>, !fir.ref, i32, i32) -> none ! CHECK: %[[P_CAST:.*]] = fir.convert %[[P_DECL]]#1 : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}> ! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#1 : (!fir.ref>>>) -> !fir.ref> @@ -276,7 +276,7 @@ subroutine test_allocatable() ! CHECK: %[[C0:.*]] = arith.constant 0 : i32 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C1_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref>, !fir.ref, i32, i32) -> none ! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#1 : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}> ! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#1 : (!fir.ref>>>) -> !fir.ref> @@ -285,7 +285,7 @@ subroutine test_allocatable() ! CHECK: %[[C0:.*]] = arith.constant 0 : i32 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C2_CAST]], %[[TYPE_DESC_P2_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref>, !fir.ref, i32, i32) -> none ! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#1 : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}> ! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#1 : (!fir.ref>>>>) -> !fir.ref> @@ -300,7 +300,7 @@ subroutine test_allocatable() ! CHECK: %[[C10_I64:.*]] = fir.convert %[[C10]] : (i32) -> i64 ! CHECK: %{{.*}} = fir.call @_FortranAAllocatableSetBounds(%[[C3_CAST]], %[[C0]], %[[C1_I64]], %[[C10_I64]]) {{.*}}: (!fir.ref>, i32, i64, i64) -> none ! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#1 : (!fir.ref>>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}> ! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#1 : (!fir.ref>>>>) -> !fir.ref> @@ -316,7 +316,7 @@ subroutine test_allocatable() ! CHECK: %[[C20_I64:.*]] = fir.convert %[[C20]] : (i32) -> i64 ! CHECK: %{{.*}} = fir.call @_FortranAAllocatableSetBounds(%[[C4_CAST]], %[[C0]], %[[C1_I64]], %[[C20_I64]]) {{.*}}: (!fir.ref>, i32, i64, i64) -> none ! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#1 : (!fir.ref>>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[C1_LOAD1:.*]] = fir.load %[[C1_DECL]]#0 : !fir.ref>>> ! CHECK: fir.dispatch "proc1"(%[[C1_LOAD1]] : !fir.class>>) @@ -390,7 +390,7 @@ subroutine test_unlimited_polymorphic_with_intrinsic_type_spec() ! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32 ! CHECK: %{{.*}} = fir.call @_FortranAAllocatableInitIntrinsicForAllocate(%[[BOX_NONE]], %[[CAT]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref>, i32, i32, i32, i32) -> none ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[P_DECL]]#1 : (!fir.ref>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[PTR_DECL]]#1 : (!fir.ref>>) -> !fir.ref> ! CHECK: %[[CAT:.*]] = arith.constant 2 : i32 @@ -573,7 +573,7 @@ subroutine test_allocatable_up_character() ! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32 ! CHECK: %{{.*}} = fir.call @_FortranAAllocatableInitCharacterForAllocate(%[[A_NONE]], %[[LEN]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref>, i64, i32, i32, i32) -> none ! CHECK: %[[A_NONE:.*]] = fir.convert %[[A_DECL]]#1 : (!fir.ref>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 end module @@ -592,17 +592,17 @@ program test_alloc ! LLVM-LABEL: define void @_QMpolyPtest_allocatable() ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 0, i32 0) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 1, i32 0) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 10) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 1, i32 0) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 20) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM-COUNT-2: call void %{{[0-9]*}}() ! LLVM: call void @llvm.memcpy.p0.p0.i32 @@ -683,5 +683,5 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr null, i64 ptrtoint (ptr getelementptr (%_QMpolyTp1, ptr null, i32 1) to i64), i32 20240719, i8 0, i8 42, i8 2, i8 1, ptr @_QMpolyEXdtXp1, [1 x i64] zeroinitializer }, ptr %[[ALLOCA1:[0-9]*]] ! LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[ALLOCA2:[0-9]+]], ptr %[[ALLOCA1]], i32 40, i1 false) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %[[ALLOCA2]], ptr @_QMpolyEXdtXp1, i32 0, i32 0) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i64 -1, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: %{{.*}} = call i32 @_FortranAAllocatableDeallocatePolymorphic(ptr %[[ALLOCA2]], ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) diff --git a/flang/test/Lower/allocatable-runtime.f90 b/flang/test/Lower/allocatable-runtime.f90 index effd0a3a93b8403..3f1f8a86b7d0717 100644 --- a/flang/test/Lower/allocatable-runtime.f90 +++ b/flang/test/Lower/allocatable-runtime.f90 @@ -31,7 +31,7 @@ subroutine foo() ! CHECK: fir.call @{{.*}}AllocatableSetBounds(%[[xBoxCast2]], %c0{{.*}}, %[[xlbCast]], %[[xubCast]]) {{.*}}: (!fir.ref>, i32, i64, i64) -> none ! CHECK-DAG: %[[xBoxCast3:.*]] = fir.convert %[[xBoxAddr]] : (!fir.ref>>>) -> !fir.ref> ! CHECK-DAG: %[[sourceFile:.*]] = fir.convert %{{.*}} -> !fir.ref - ! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %c-1{{.*}}, %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 + ! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! Simply check that we are emitting the right numebr of set bound for y and z. Otherwise, this is just like x. ! CHECK: fir.convert %[[yBoxAddr]] : (!fir.ref>>>) -> !fir.ref> @@ -180,4 +180,4 @@ subroutine mold_allocation() ! CHECK: %[[M_BOX_NONE:.*]] = fir.convert %[[EMBOX_M]] : (!fir.box>) -> !fir.box ! CHECK: %{{.*}} = fir.call @_FortranAAllocatableApplyMold(%[[A_BOX_NONE]], %[[M_BOX_NONE]], %[[RANK]]) {{.*}} : (!fir.ref>, !fir.box, i32) -> none ! CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A]] : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 diff --git a/flang/test/Lower/allocate-mold.f90 b/flang/test/Lower/allocate-mold.f90 index 831b26022dd464a..0cc10fc9016de14 100644 --- a/flang/test/Lower/allocate-mold.f90 +++ b/flang/test/Lower/allocate-mold.f90 @@ -16,7 +16,7 @@ subroutine scalar_mold_allocation() ! CHECK: %[[A_REF_BOX_NONE1:.*]] = fir.convert %[[A]] : (!fir.ref>>) -> !fir.ref> ! CHECK: %{{.*}} = fir.call @_FortranAAllocatableApplyMold(%[[A_REF_BOX_NONE1]], %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.box, i32) -> none ! CHECK: %[[A_REF_BOX_NONE2:.*]] = fir.convert %[[A]] : (!fir.ref>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 subroutine array_scalar_mold_allocation() real, allocatable :: a(:) @@ -40,4 +40,4 @@ end subroutine array_scalar_mold_allocation ! CHECK: %[[REF_BOX_A1:.*]] = fir.convert %1 : (!fir.ref>>>) -> !fir.ref> ! CHECK: %{{.*}} = fir.call @_FortranAAllocatableSetBounds(%[[REF_BOX_A1]], {{.*}},{{.*}}, {{.*}}) fastmath : (!fir.ref>, i32, i64, i64) -> none ! CHECK: %[[REF_BOX_A2:.*]] = fir.convert %[[A]] : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 index 8c212ce05a8c70d..8c40c91bc3baa92 100644 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -1154,11 +1154,11 @@ program test ! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "test"} { ! CHECK: %[[ADDR_O:.*]] = fir.address_of(@_QFEo) : !fir.ref}>>>> ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[ADDR_O]] : (!fir.ref}>>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[O:.*]] = fir.load %[[ADDR_O]] : !fir.ref}>>>> ! CHECK: %[[FIELD_INNER:.*]] = fir.field_index inner, !fir.type<_QMpolymorphic_testTouter{inner:!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>}> ! CHECK: %[[COORD_INNER:.*]] = fir.coordinate_of %[[O]], %[[FIELD_INNER]] : (!fir.box}>>>, !fir.field) -> !fir.ref> -! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg1 = %{{.*}}) -> (!fir.array<5x!fir.logical<4>>) { +! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg1 = %9) -> (!fir.array<5x!fir.logical<4>>) { ! CHECK: %[[EMBOXED:.*]] = fir.embox %[[COORD_INNER]] : (!fir.ref>) -> !fir.class> -! CHECK: %{{.*}} = fir.call @_QMpolymorphic_testPlt(%{{.*}}, %[[EMBOXED]]) {{.*}} : (!fir.ref, !fir.class>) -> !fir.logical<4> +! CHECK: %{{.*}} = fir.call @_QMpolymorphic_testPlt(%17, %[[EMBOXED]]) {{.*}} : (!fir.ref, !fir.class>) -> !fir.logical<4> ! CHECK: } diff --git a/flang/unittests/Runtime/CUDA/Allocatable.cpp b/flang/unittests/Runtime/CUDA/Allocatable.cpp index 171ca982a04f1ca..0f7eb27789316cb 100644 --- a/flang/unittests/Runtime/CUDA/Allocatable.cpp +++ b/flang/unittests/Runtime/CUDA/Allocatable.cpp @@ -42,8 +42,7 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocatable) { CUDA_REPORT_IF_ERROR(cudaMalloc(&device_desc, a->SizeInBytes())); RTNAME(AllocatableAllocate) - (*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, - __LINE__); + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); EXPECT_TRUE(a->IsAllocated()); RTNAME(CUFDescriptorSync)(device_desc, a.get(), __FILE__, __LINE__); cudaDeviceSynchronize(); diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp index 2cc49b6a63af5b0..7cb25787e7797ba 100644 --- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp +++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -35,25 +35,7 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) { EXPECT_FALSE(a->HasAddendum()); RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); RTNAME(AllocatableAllocate) - (*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, - __LINE__); - EXPECT_TRUE(a->IsAllocated()); - RTNAME(AllocatableDeallocate) (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); - EXPECT_FALSE(a->IsAllocated()); -} - -TEST(AllocatableCUFTest, SimpleStreamDeviceAllocate) { - using Fortran::common::TypeCategory; - RTNAME(CUFRegisterAllocator)(); - // REAL(4), DEVICE, ALLOCATABLE :: a(:) - auto a{createAllocatable(TypeCategory::Real, 4)}; - a->SetAllocIdx(kDeviceAllocatorPos); - EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx()); - EXPECT_FALSE(a->HasAddendum()); - RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); - RTNAME(AllocatableAllocate) - (*a, 1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); EXPECT_TRUE(a->IsAllocated()); RTNAME(AllocatableDeallocate) (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); @@ -71,8 +53,7 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) { EXPECT_FALSE(a->HasAddendum()); RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); RTNAME(AllocatableAllocate) - (*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, - __LINE__); + (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); EXPECT_TRUE(a->IsAllocated()); RTNAME(AllocatableDeallocate) (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp index 2f40915af38674c..7c8b7aa5a4d78b4 100644 --- a/flang/unittests/Runtime/CUDA/Memory.cpp +++ b/flang/unittests/Runtime/CUDA/Memory.cpp @@ -51,8 +51,7 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) { EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx()); RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10); RTNAME(AllocatableAllocate) - (*dev, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, - __LINE__); + (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); EXPECT_TRUE(dev->IsAllocated()); // Create temp array to transfer to device.