Skip to content

Commit

Permalink
Merge branch 'master' into spmm_constraints
Browse files Browse the repository at this point in the history
  • Loading branch information
devreal authored Jun 4, 2024
2 parents d31e666 + 754c7d7 commit be7ff9b
Show file tree
Hide file tree
Showing 35 changed files with 721 additions and 255 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:

- name: Install prerequisite MacOS packages
if: ${{ matrix.os == 'macos-latest' }}
run: brew install ninja gcc@10 boost eigen open-mpi bison ccache
run: brew install ninja boost eigen open-mpi bison ccache

- name: Install prerequisites Ubuntu packages
if: ${{ matrix.os == 'ubuntu-22.04' }}
Expand Down
23 changes: 21 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ option(TTG_ENABLE_LEVEL_ZERO "Whether to TTG will look for Intel oneAPI Level Ze
option(TTG_EXAMPLES "Whether to build examples" OFF)
option(TTG_ENABLE_ASAN "Whether to enable address sanitizer" OFF)

option(TTG_ENABLE_COROUTINES "Whether to enable C++ coroutines, needed for accelerator device support" ON)
option(TTG_FETCH_BOOST "Whether to fetch+build Boost, if missing" OFF)
option(TTG_IGNORE_BUNDLED_EXTERNALS "Whether to skip installation and use of bundled external dependencies (Boost.CallableTraits)" OFF)
option(TTG_ENABLE_TRACE "Whether to enable ttg::trace() output" OFF)
Expand Down Expand Up @@ -94,8 +95,26 @@ endif (BUILD_TESTING)
###########################
# Boost
include("${PROJECT_SOURCE_DIR}/cmake/modules/FindOrFetchBoost.cmake")
# C++ coroutines
find_package(CXXStdCoroutine MODULE REQUIRED COMPONENTS Final Experimental)

if (TTG_ENABLE_COROUTINES)
set(SKIP_COROUTINE_DETECTION FALSE)
# C++ coroutines, check for broken GCC releases and skip if one is found
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 11.4.0)
set(SKIP_COROUTINE_DETECTION TRUE)
elseif(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.1.0 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 12.3.0)
set(SKIP_COROUTINE_DETECTION TRUE)
endif()
if (SKIP_COROUTINE_DETECTION)
message(WARNING "GCC with broken Coroutine support detected, disabling Coroutine support. At least GCC 11.4, 12.3, or 13.1 required.")
endif(SKIP_COROUTINE_DETECTION)
endif(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU")

if (NOT SKIP_COROUTINE_DETECTION)
find_package(CXXStdCoroutine MODULE REQUIRED COMPONENTS Final Experimental)
set(TTG_HAVE_COROUTINE CXXStdCoroutine_FOUND CACHE BOOL "True if the compiler has coroutine support")
endif(NOT SKIP_COROUTINE_DETECTION)
endif(TTG_ENABLE_COROUTINES)


##########################
Expand Down
2 changes: 1 addition & 1 deletion cmake/modules/ExternalDependenciesVersions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
set(TTG_TRACKED_VG_CMAKE_KIT_TAG 7ea2d4d3f8854b9e417f297fd74d6fc49aa13fd5) # used to provide "real" FindOrFetchBoost
set(TTG_TRACKED_CATCH2_VERSION 3.5.0)
set(TTG_TRACKED_MADNESS_TAG 2eb3bcf0138127ee2dbc651f1aabd3e9b0def4e3)
set(TTG_TRACKED_PARSEC_TAG 0b3140f58ad9dc78a3d64da9fd73ecc7f443ece7)
set(TTG_TRACKED_PARSEC_TAG 58f8f3089ecad2e8ee50e80a9586e05ce8873b1c)
set(TTG_TRACKED_BTAS_TAG 4e8f5233aa7881dccdfcc37ce07128833926d3c2)
set(TTG_TRACKED_TILEDARRAY_TAG 493c109379a1b64ddd5ef59f7e33b95633b68d73)

Expand Down
1 change: 1 addition & 0 deletions cmake/modules/FindOrFetchBoost.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ if (TTG_PARSEC_USE_BOOST_SERIALIZATION)
list(APPEND optional_components
serialization
iostreams
graph
)
endif()

Expand Down
2 changes: 1 addition & 1 deletion cmake/modules/FindOrFetchPARSEC.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if (NOT TARGET PaRSEC::parsec)

FetchContent_Declare(
PARSEC
GIT_REPOSITORY https://github.com/devreal/parsec-1.git
GIT_REPOSITORY https://github.com/ICLDisco/parsec.git
GIT_TAG ${TTG_TRACKED_PARSEC_TAG}
)
FetchContent_MakeAvailable(PARSEC)
Expand Down
8 changes: 5 additions & 3 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ add_ttg_executable(t9-streaming t9/t9_streaming.cc)
# sparse matmul need Eigen ... it's always provided by TA
if (TARGET tiledarray)
# MADworld used for MADNESS serialization
add_ttg_executable(spmm spmm/spmm.cc LINK_LIBRARIES TiledArray_Eigen)
add_ttg_executable(spmm spmm/spmm.cc LINK_LIBRARIES TiledArray_Eigen $<TARGET_NAME_IF_EXISTS:Boost::graph>
COMPILE_DEFINITIONS $<$<TARGET_EXISTS:Boost::graph>:HAVE_BOOST_GRAPH=1>)
# block-sparse needs BTAS ... it's always provided by TA
# since only need to use matrices, limit BTAS_TARGET_MAX_INDEX_RANK to 2
add_ttg_executable(bspmm spmm/spmm.cc LINK_LIBRARIES tiledarray TiledArray_Eigen BTAS COMPILE_DEFINITIONS BLOCK_SPARSE_GEMM=1;BTAS_TARGET_MAX_INDEX_RANK=2)
add_ttg_executable(bspmm spmm/spmm.cc LINK_LIBRARIES tiledarray TiledArray_Eigen BTAS
COMPILE_DEFINITIONS BLOCK_SPARSE_GEMM=1;BTAS_TARGET_MAX_INDEX_RANK=2)

add_ttg_executable(testing_dpotrf potrf/testing_dpotrf.cc LINK_LIBRARIES tiledarray lapackpp)
add_ttg_executable(testing_dtrtri potrf/testing_dtrtri.cc LINK_LIBRARIES tiledarray lapackpp)
Expand All @@ -37,7 +39,7 @@ if (TARGET tiledarray)
if (TARGET roc::hipsolver)
add_ttg_executable(testing_dpotrf_hip potrf/testing_dpotrf.cc
LINK_LIBRARIES lapackpp tiledarray roc::hipblas roc::hipsolver
COMPILE_DEFINITIONS TTG_ENABLE_HIP=1;DEBUG_TILES_VALUES=1
COMPILE_DEFINITIONS TTG_ENABLE_HIP=1 #;DEBUG_TILES_VALUES=1
RUNTIMES "parsec")
endif(TARGET roc::hipsolver)
elseif (TARGET MKL::MKL_DPCPP)
Expand Down
4 changes: 2 additions & 2 deletions examples/matrixtile.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#if defined(TILEDARRAY_HAS_DEVICE)
#define ALLOCATOR TiledArray::device_pinned_allocator<T>

inline void allocator_init() {
inline void allocator_init(int argc, char **argv) {
// initialize MADNESS so that TA allocators can be created
#if defined(TTG_PARSEC_IMPORTED)
madness::ParsecRuntime::initialize_with_existing_context(ttg::default_execution_context().impl().context());
Expand All @@ -28,7 +28,7 @@ inline void allocator_fini() {
#else // TILEDARRAY_HAS_DEVICE
#define ALLOCATOR std::allocator<T>

inline void allocator_init() { }
inline void allocator_init(int argc, char **argv) { }

inline void allocator_fini() { }

Expand Down
10 changes: 9 additions & 1 deletion examples/potrf/pmw.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ class PaRSECMatrixWrapper {
(pm->uplo == PARSEC_MATRIX_UPPER && col >= row);
}

int P() const {
return pm->grid.rows;
}

int Q() const {
return pm->grid.cols;
}

PaRSECMatrixT* parsec() {
return pm;
}
Expand Down Expand Up @@ -132,7 +140,7 @@ class PaRSECMatrixWrapper {
};

template<typename ValueT>
using MatrixT = PaRSECMatrixWrapper<sym_two_dim_block_cyclic_t, ValueT>;
using MatrixT = PaRSECMatrixWrapper<parsec_matrix_sym_block_cyclic_t, ValueT>;

static auto make_load_tt(MatrixT<double> &A, ttg::Edge<Key2, MatrixTile<double>> &toop, bool defer_write)
{
Expand Down
26 changes: 25 additions & 1 deletion examples/potrf/potrf.h
Original file line number Diff line number Diff line change
Expand Up @@ -674,10 +674,22 @@ namespace potrf {
auto keymap1 = [&](const Key1& key) { return A.rank_of(key[0], key[0]); };

auto keymap2a = [&](const Key2& key) { return A.rank_of(key[0], key[1]); };
auto keymap2b = [&](const Key2& key) { return A.rank_of(key[0], key[0]); };
auto keymap2b = [&](const Key2& key) { return A.rank_of(key[1], key[1]); };

auto keymap3 = [&](const Key3& key) { return A.rank_of(key[0], key[1]); };

/**
* Device map hints: we try to keep tiles on one row on the same device to minimize
* data movement between devices. This provides hints for load-balancing up front
* and avoids movement of the TRSM result to GEMM tasks.
*/
auto devmap1 = [&](const Key1& key) { return (key[0] / A.P()) % ttg::device::num_devices(); };

auto devmap2a = [&](const Key2& key) { return (key[0] / A.P()) % ttg::device::num_devices(); };
auto devmap2b = [&](const Key2& key) { return (key[1] / A.P()) % ttg::device::num_devices(); };

auto devmap3 = [&](const Key3& key) { return (key[0] / A.P()) % ttg::device::num_devices(); };

ttg::Edge<Key1, MatrixTile<T>> syrk_potrf("syrk_potrf"), disp_potrf("disp_potrf");

ttg::Edge<Key2, MatrixTile<T>> potrf_trsm("potrf_trsm"), trsm_syrk("trsm_syrk"), gemm_trsm("gemm_trsm"),
Expand All @@ -692,18 +704,30 @@ namespace potrf {
auto tt_potrf = make_potrf(A, disp_potrf, syrk_potrf, potrf_trsm, output);
tt_potrf->set_keymap(keymap1);
tt_potrf->set_defer_writer(defer_write);
#ifdef ENABLE_DEVICE_KERNEL
tt_potrf->set_devicemap(devmap1);
#endif // 0

auto tt_trsm = make_trsm(A, disp_trsm, potrf_trsm, gemm_trsm, trsm_syrk, trsm_gemm_row, trsm_gemm_col, output);
tt_trsm->set_keymap(keymap2a);
tt_trsm->set_defer_writer(defer_write);
#ifdef ENABLE_DEVICE_KERNEL
tt_trsm->set_devicemap(devmap2a);
#endif // 0

auto tt_syrk = make_syrk(A, disp_syrk, trsm_syrk, syrk_syrk, syrk_potrf, syrk_syrk);
tt_syrk->set_keymap(keymap2b);
tt_syrk->set_defer_writer(defer_write);
#ifdef ENABLE_DEVICE_KERNEL
tt_syrk->set_devicemap(devmap2b);
#endif // 0

auto tt_gemm = make_gemm(A, disp_gemm, trsm_gemm_row, trsm_gemm_col, gemm_gemm, gemm_trsm, gemm_gemm);
tt_gemm->set_keymap(keymap3);
tt_gemm->set_defer_writer(defer_write);
#ifdef ENABLE_DEVICE_KERNEL
tt_gemm->set_devicemap(devmap3);
#endif // 0

/* Priorities taken from DPLASMA */
auto nt = A.cols();
Expand Down
2 changes: 1 addition & 1 deletion examples/potrf/testing_dlauum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ int main(int argc, char **argv)
ttg::initialize(argc, argv, nthreads);

/* set up TA to get the allocator */
allocator_init();
allocator_init(argc, argv);

auto world = ttg::default_execution_context();

Expand Down
2 changes: 1 addition & 1 deletion examples/potrf/testing_dpoinv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ int main(int argc, char **argv)
delete[] ttg_argv;

/* set up TA to get the allocator */
allocator_init();
allocator_init(argc, argv);

ttg::trace_on();

Expand Down
2 changes: 1 addition & 1 deletion examples/potrf/testing_dpotrf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ int main(int argc, char **argv)
ttg::initialize(1, argv, nthreads);

/* set up TA to get the allocator */
allocator_init();
allocator_init(argc, argv);

auto world = ttg::default_execution_context();
if(nullptr != prof_filename) {
Expand Down
2 changes: 1 addition & 1 deletion examples/potrf/testing_dtrtri.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ int main(int argc, char **argv)
ttg::initialize(argc, argv, nthreads);

/* set up TA to get the allocator */
allocator_init();
allocator_init(argc, argv);

auto world = ttg::default_execution_context();

Expand Down
6 changes: 5 additions & 1 deletion examples/spmm/spmm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#endif

#include <sys/time.h>
#include <boost/graph/rmat_graph_generator.hpp>
#if !defined(BLOCK_SPARSE_GEMM)
#include <boost/graph/rmat_graph_generator.hpp>
#include <boost/graph/directed_graph.hpp>
#include <boost/random/linear_congruential.hpp>
#include <unsupported/Eigen/SparseExtra>
Expand Down Expand Up @@ -952,6 +952,7 @@ static void initSpMatrixMarket(const std::function<int(const Key<2> &)> &keymap,
K = (int)A.cols();
}

#ifdef HAVE_BOOST_GRAPH
static void initSpRmat(const std::function<int(const Key<2> &)> &keymap, const char *opt, SpMatrix<> &A, SpMatrix<> &B,
SpMatrix<> &C, int &M, int &N, int &K, unsigned long seed) {
int E;
Expand Down Expand Up @@ -1008,6 +1009,7 @@ static void initSpRmat(const std::function<int(const Key<2> &)> &keymap, const c
std::cout << "#R-MAT: " << E << " nonzero elements, density: " << (double)nnz / (double)N / (double)N << std::endl;
}
}
#endif // HAVE_BOOST_GRAPH

static void initSpHardCoded(const std::function<int(const Key<2> &)> &keymap, SpMatrix<> &A, SpMatrix<> &B,
SpMatrix<> &C, int &m, int &n, int &k) {
Expand Down Expand Up @@ -1650,10 +1652,12 @@ int main(int argc, char **argv) {
char *filename = getCmdOption(argv, argv + argc, "-mm");
tiling_type = filename;
initSpMatrixMarket(ij_keymap, filename, A, B, C, M, N, K);
#ifdef HAVE_BOOST_GRAPH
} else if (cmdOptionExists(argv, argv + argc, "-rmat")) {
char *opt = getCmdOption(argv, argv + argc, "-rmat");
tiling_type = "RandomSparseMatrix";
initSpRmat(ij_keymap, opt, A, B, C, M, N, K, seed);
#endif // HAVE_BOOST_GRAPH
} else {
tiling_type = "HardCodedSparseMatrix";
initSpHardCoded(ij_keymap, A, B, C, M, N, K);
Expand Down
18 changes: 10 additions & 8 deletions tests/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,21 @@ set(ut_src
ranges.cc
tt.cc
unit_main.cpp
streams.cc
streams.cc
constraints.cc
)
set(ut_libs Catch2::Catch2)

# coroutine tests
# we definitely have TARGET std::coroutine
list(APPEND ut_src fibonacci-coro.cc)
list(APPEND ut_src device_coro.cc)
list(APPEND ut_src constraints.cc)
if (TTG_HAVE_CUDA)
list(APPEND ut_src cuda_kernel.cu)
endif(TTG_HAVE_CUDA)
list(APPEND ut_libs std::coroutine)
if (CXXStdCoroutine_FOUND)
list(APPEND ut_src fibonacci-coro.cc)
list(APPEND ut_src device_coro.cc)
if (TTG_HAVE_CUDA)
list(APPEND ut_src cuda_kernel.cu)
endif(TTG_HAVE_CUDA)
list(APPEND ut_libs std::coroutine)
endif(CXXStdCoroutine_FOUND)

add_ttg_executable(core-unittests-ttg "${ut_src}" LINK_LIBRARIES "${ut_libs}" COMPILE_DEFINITIONS "CATCH_CONFIG_NO_POSIX_SIGNALS=1" )

Expand Down
4 changes: 2 additions & 2 deletions tests/unit/serialization.cc
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ TEST_CASE("MADNESS Serialization", "[serialization]") {

T g_obj;
void* g = (void*)&g_obj;
CHECK_NOTHROW(d->unpack_payload(g, obj_size, 0, buf.get()));
CHECK(d->unpack_payload(g, obj_size, 0, buf.get()) == pos);
};

test(99);
Expand Down Expand Up @@ -755,7 +755,7 @@ TEST_CASE("TTG Serialization", "[serialization]") {

T g_obj;
void* g = (void*)&g_obj;
CHECK_NOTHROW(d->unpack_payload(g, obj_size, 0, buf.get()));
CHECK(d->unpack_payload(g, obj_size, 0, buf.get()) == pos);
};

test(99);
Expand Down
2 changes: 1 addition & 1 deletion ttg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ if (TTG_ENABLE_TRACE)
endif (TTG_ENABLE_TRACE)
if (TARGET std::coroutine)
list(APPEND ttg-deps std::coroutine)
list(APPEND ttg-defs "TTG_HAS_COROUTINE=1")
list(APPEND ttg-util-headers
${CMAKE_CURRENT_SOURCE_DIR}/ttg/coroutine.h
)
Expand Down Expand Up @@ -209,6 +208,7 @@ endif(TARGET Boost::serialization)
if (TARGET MADworld)
set(ttg-mad-headers
${CMAKE_CURRENT_SOURCE_DIR}/ttg/madness/buffer.h
${CMAKE_CURRENT_SOURCE_DIR}/ttg/madness/device.h
${CMAKE_CURRENT_SOURCE_DIR}/ttg/madness/fwd.h
${CMAKE_CURRENT_SOURCE_DIR}/ttg/madness/import.h
${CMAKE_CURRENT_SOURCE_DIR}/ttg/madness/ttg.h
Expand Down
3 changes: 3 additions & 0 deletions ttg/ttg/config.in.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
/** the C++ namespace containing the coroutine API */
#define TTG_CXX_COROUTINE_NAMESPACE @CXX_COROUTINE_NAMESPACE@

/** whether the compiler supports C++ coroutines */
#cmakedefine TTG_HAVE_COROUTINE

/** whether TTG has CUDA language support */
#cmakedefine TTG_HAVE_CUDA

Expand Down
5 changes: 5 additions & 0 deletions ttg/ttg/coroutine.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
#define TTG_COROUTINE_H

#include "ttg/config.h"

#ifdef TTG_HAVE_COROUTINE
#include TTG_CXX_COROUTINE_HEADER

#include <algorithm>
#include <array>


namespace ttg {

// import std coroutine API into ttg namespace
Expand Down Expand Up @@ -227,4 +230,6 @@ namespace ttg {

} // namespace ttg

#endif // TTG_HAVE_COROUTINE

#endif // TTG_COROUTINE_H
8 changes: 8 additions & 0 deletions ttg/ttg/device/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include "ttg/config.h"
#include "ttg/execution.h"
#include "ttg/impl_selector.h"
#include "ttg/fwd.h"



Expand Down Expand Up @@ -180,3 +182,9 @@ namespace ttg::device {
}
} // namespace ttg
#endif // defined(TTG_HAVE_HIP)

namespace ttg::device {
inline int num_devices() {
return TTG_IMPL_NS::num_devices();
}
}
Loading

0 comments on commit be7ff9b

Please sign in to comment.