Skip to content

Commit

Permalink
Merge branch 'develop' into nx_delay_rank
Browse files Browse the repository at this point in the history
  • Loading branch information
ye-luo authored Jun 3, 2021
2 parents c7aa84f + 6ab5543 commit 20254e2
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 7 deletions.
7 changes: 7 additions & 0 deletions CMake/PGICompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ ADD_DEFINITIONS( -Drestrict=__restrict__ )
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__forceinline=inline")

# Suppress compile warnings
# 177 variable "XX" was declared but never referenced
# 550 variable "XX" was set but never used
# 612 overloaded virtual function "AA" is only partially overridden in class "BB"
# 998 function "AA" is hidden by "BB" -- virtual function override intended?
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --display_error_number --diag_suppress 177 --diag_suppress 550")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --display_error_number --diag_suppress 177 --diag_suppress 550 --diag_suppress 612 --diag_suppress 998")

# Set extra optimization specific flags
SET( CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -fast" )
Expand Down
2 changes: 1 addition & 1 deletion src/Particle/SoaDistanceTableABOMPTarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,9 @@ class SoaDistanceTableABOMPTarget : public DTD_BConds<T, D, SC>, public Distance
const size_t total_targets = count_targets;

const int N_sources_padded = getAlignedSize<T>(N_sources);
const int stride_size = N_sources_padded * (D + 1);

#ifndef NDEBUG
const int stride_size = N_sources_padded * (D + 1);
count_targets = 0;
for (size_t iw = 0; iw < dt_list.size(); iw++)
{
Expand Down
2 changes: 1 addition & 1 deletion src/Platforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ SUBDIRS(CPU)
TARGET_LINK_LIBRARIES(platform_device PUBLIC platform_cpu)

SUBDIRS(OMPTarget)
TARGET_LINK_LIBRARIES(platform_device PRIVATE platform_omptarget)
TARGET_LINK_LIBRARIES(platform_device PUBLIC platform_omptarget)

IF(QMC_CUDA)
SUBDIRS(CUDA_legacy)
Expand Down
34 changes: 32 additions & 2 deletions src/Platforms/OMPTarget/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#// This file is distributed under the University of Illinois/NCSA Open Source License.
#// See LICENSE file in top directory for details.
#//
#// Copyright (c) 2020 QMCPACK developers.
#// Copyright (c) 2021 QMCPACK developers.
#//
#// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
#//
Expand All @@ -22,5 +22,35 @@ ELSE()
ENDIF()

IF(NOT QMC_COMPLEX)
TARGET_COMPILE_DEFINITIONS(platform_omptarget PRIVATE -DOPENMP_NO_COMPLEX)
TARGET_COMPILE_DEFINITIONS(platform_omptarget PUBLIC OPENMP_NO_COMPLEX)
else()
# Test if C++ compiler supports OpenMP user defined reduction on complex type
try_compile(OMP_UDR_COMPLEX_OKAY ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/test_user_defined_reduction_complex.cpp
CMAKE_FLAGS "${CMAKE_CXX_FLAGS}"
OUTPUT_VARIABLE COMPILE_OUTPUT)

if (NOT OMP_UDR_COMPLEX_OKAY)
set(COMPILE_FAIL_OUTPUT omp_UDR_complex_fail.txt)
file(WRITE "${CMAKE_BINARY_DIR}/${COMPILE_FAIL_OUTPUT}" "${COMPILE_OUTPUT}")
message(STATUS "OpenMP user-defined reduction functionality compilation check failed!"
"See compiler output at ${COMPILE_FAIL_OUTPUT}")

# Test if C++ compiler supports OpenMP implicit reduction on complex type
try_compile(OMP_IMPLICIT_REDUCTION_COMPLEX_OKAY ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/test_user_defined_reduction_complex.cpp
CMAKE_FLAGS "${CMAKE_CXX_FLAGS}"
COMPILE_DEFINITIONS "-DOPENMP_NO_UDR"
OUTPUT_VARIABLE COMPILE_OUTPUT)

if (NOT OMP_IMPLICIT_REDUCTION_COMPLEX_OKAY)
set(COMPILE_FAIL_OUTPUT omp_implicit_reduction_complex_fail.txt)
file(WRITE "${CMAKE_BINARY_DIR}/${COMPILE_FAIL_OUTPUT}" "${COMPILE_OUTPUT}")
message(FATAL_ERROR "OpenMP implicit reduction on complex functionality compilation check failed!"
"See compiler output at ${COMPILE_FAIL_OUTPUT}")
else()
TARGET_COMPILE_DEFINITIONS(platform_omptarget PUBLIC OPENMP_NO_UDR)
message(STATUS "OpenMP implicit reduction functionality compilation check pass")
endif()
else()
message(STATUS "OpenMP user-defined reduction functionality compilation check pass")
endif()
ENDIF()
2 changes: 2 additions & 0 deletions src/Platforms/OMPTarget/ompReduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
#include <complex>
#include "config.h"

#if !defined(OPENMP_NO_COMPLEX) && !defined(OPENMP_NO_UDR)
PRAGMA_OFFLOAD("omp declare reduction(+: std::complex<float>: omp_out += omp_in)")
PRAGMA_OFFLOAD("omp declare reduction(+: std::complex<double>: omp_out += omp_in)")
#endif

#endif // QMCPLUSPLUS_OMPREDUCTION_H
25 changes: 25 additions & 0 deletions src/Platforms/OMPTarget/test_user_defined_reduction_complex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include <vector>
#include <complex>
#include <cassert>

#if !defined(OPENMP_NO_UDR)
#pragma omp declare reduction(+: std::complex<float>: omp_out += omp_in)
#endif

int main()
{
const int N = 100;
std::vector<std::complex<float>> array(N);

auto array_ptr = array.data();
for (int i = 0; i < N; i++)
array_ptr[i] = std::complex<float>(i);

std::complex<float> sum;
#pragma omp parallel for reduction(+: sum)
for (int i = 0; i < N; i++)
sum += array_ptr[i];

assert(std::real(sum) == 4950);
assert(std::imag(sum) == 0);
}
1 change: 0 additions & 1 deletion src/QMCHamiltonians/NonLocalECPComponent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ void NonLocalECPComponent::mw_evaluateOne(const RefVectorWithLeader<NonLocalECPC
for (size_t i = 0; i < p_list.size(); i++)
{
NonLocalECPComponent& component(ecp_component_list[i]);
auto* VP = component.VP;
ParticleSet& W(p_list[i]);
TrialWaveFunction& psi(psi_list[i]);
const NLPPJob<RealType>& job = joblist[i];
Expand Down
4 changes: 2 additions & 2 deletions src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ struct HybridRepSetReader : public SplineSetReader<SA>
{
auto& mycenter = centers[mygroup[idx]];
aligned_vector<double> splineData_r(spline_npoints);
UBspline_1d_d* atomic_spline_r;
UBspline_1d_d* atomic_spline_r = nullptr;
for (size_t ip = 0; ip < spline_npoints; ip++)
splineData_r[ip] = all_vals[idx][ip][lm];
atomic_spline_r = einspline::create(atomic_spline_r, 0.0, spline_radius, spline_npoints, splineData_r.data(),
Expand All @@ -547,7 +547,7 @@ struct HybridRepSetReader : public SplineSetReader<SA>
else
{
aligned_vector<double> splineData_i(spline_npoints);
UBspline_1d_d* atomic_spline_i;
UBspline_1d_d* atomic_spline_i = nullptr;
for (size_t ip = 0; ip < spline_npoints; ip++)
splineData_i[ip] = all_vals[idx][ip][lm + lm_tot];
atomic_spline_i = einspline::create(atomic_spline_i, 0.0, spline_radius, spline_npoints,
Expand Down

0 comments on commit 20254e2

Please sign in to comment.