Skip to content

Commit

Permalink
Benchmarks - work in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
PeterTh committed Dec 9, 2021
1 parent da1c240 commit 3d57601
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "vendor/Catch2"]
path = vendor/Catch2
url = https://github.com/catchorg/Catch2
[submodule "vendor/args"]
path = vendor/args
url = https://github.com/Taywee/args.git
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ endmacro()
add_submodule_directory(vendor/spdlog)
add_submodule_directory(vendor/Catch2)

# we do not want to build the args example or tests
set(ARGS_BUILD_EXAMPLE OFF CACHE INTERNAL "")
set(ARGS_BUILD_UNITTESTS OFF CACHE INTERNAL "")
add_submodule_directory(vendor/args)

# Add includes to library so they show up in IDEs
file(GLOB_RECURSE INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h)

Expand Down Expand Up @@ -226,6 +231,13 @@ if(CELERITY_BUILD_EXAMPLES)
add_subdirectory(examples)
endif()

# Benchmarks

option(CELERITY_BUILD_BENCHMARKS "Build benchmark applications" ON)
if(CELERITY_BUILD_BENCHMARKS)
add_subdirectory(benchmarks)
endif()

# Tests

enable_testing(true)
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ Building can be as simple as calling `cmake && make`, depending on your setup
you might however also have to provide some library paths etc.
See our [installation guide](docs/installation.md) for more information.

The runtime comes with several [examples](examples) that are built
automatically when the `CELERITY_BUILD_EXAMPLES` CMake option is set (true by
default).
The runtime comes with [examples](examples) and [benchmarks](benchmarks) that are built
automatically when the `CELERITY_BUILD_EXAMPLES` and/pr `CELERITY_BUILD_BENCHMARKS` CMake
options are set (both are true by default).

## Using Celerity as a Library

Expand Down
39 changes: 39 additions & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
cmake_minimum_required(VERSION 3.13)

if(DEFINED PROJECT_NAME)
set(INCLUDED_AS_SUBPROJECT ON)
else()
set(INCLUDED_AS_SUBPROJECT OFF)
endif()

file(STRINGS "../VERSION" Celerity_VERSION)
project(celerity_benchmarks VERSION "${Celerity_VERSION}" LANGUAGES CXX)

if(NOT INCLUDED_AS_SUBPROJECT)
find_package(Celerity "${Celerity_VERSION}")
endif()

function(add_benchmark NAME)
add_executable(
"${NAME}"
"${NAME}.cc"
)

set_property(TARGET "${NAME}" PROPERTY CXX_STANDARD 17)
set_property(TARGET "${NAME}" PROPERTY FOLDER "benchmarks")

add_celerity_to_target(
TARGET "${NAME}"
SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/${NAME}.cc"
)

target_link_libraries("${NAME}" PUBLIC args)

if(MSVC)
target_compile_options("${NAME}" PRIVATE /D_CRT_SECURE_NO_WARNINGS /MP /W3)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang|AppleClang")
target_compile_options("${NAME}" PRIVATE -Wall -Wextra -Wno-unused-parameter)
endif()
endfunction()

add_benchmark(task_microbenchmark)
130 changes: 130 additions & 0 deletions benchmarks/task_microbenchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Benchmarks a chain of connected tasks

#include <args.hxx>
#include <celerity.h>

enum class Topology { Soup, Chain, Map, Reduce };
constexpr const char* topologyNames[] = {"Soup", "Chain", "Map", "Reduce"};
inline const char* getTopologyName(Topology t) {
return topologyNames[(int)t];
}

int getMinBufferSizeForTopology(Topology topology, int numTasks) {
switch(topology) {
case Topology::Soup: return 0;
case Topology::Chain: return 1;
case Topology::Map:
case Topology::Reduce: return (numTasks + 1) / 2;
}
}

struct Args {
int numTasks = 1000;
int bufferSize = 2048;
Topology topology = Topology::Soup;
};

Args parseArgs(int argc, char** argv) {
Args ret;

args::ArgumentParser parser("Celerity Task and Command Microbenchmarks");
args::HelpFlag help(parser, "help", "Display this help menu", {'h', "help"});
args::Group commands(parser, "commands");
args::Command chain(commands, "chain", "benchmark a chain of connected tasks");
args::Command soup(commands, "soup", "benchmark a soup of unconnected tasks");
args::Command map(commands, "map", "benchmark tasks branching out from a single root");
args::Command reduce(commands, "reduce", "benchmark tasks converging towards a single result");
args::Group arguments(parser, "arguments", args::Group::Validators::DontCare, args::Options::Global);
args::ValueFlag<int> numTasks(arguments, "num-tasks", "The number of tasks to generate", {'n'}, 1000);
args::ValueFlag<int> bufferSize(arguments, "buffer-size", "Size of buffers used to establish task connections", {'b'}, 2048);
try {
parser.ParseCLI(argc, argv);
} catch(args::Help) {
std::cout << parser;
exit(0);
} catch(args::Error e) {
std::cerr << e.what() << std::endl;
std::cerr << parser;
exit(1);
}

ret.numTasks = numTasks.Get();
ret.bufferSize = bufferSize.Get();
if(chain) ret.topology = Topology::Chain;
if(map) ret.topology = Topology::Map;
if(reduce) ret.topology = Topology::Reduce;

auto requiredSize = getMinBufferSizeForTopology(ret.topology, ret.numTasks);
if(ret.bufferSize < requiredSize) {
std::cerr << "Topology '" << getTopologyName(ret.topology) << "' requires a buffer size of at least " << requiredSize << " for " << ret.numTasks
<< " tasks, but buffer size is set to " << ret.bufferSize << "." << std::endl;
exit(2);
}

return ret;
}

int main(int argc, char** argv) {
Args args = parseArgs(argc, argv);

celerity::distr_queue queue;
std::vector<float> host_data(args.bufferSize);
celerity::buffer<float, 1> buffer(host_data.data(), args.bufferSize);

if(args.topology == Topology::Soup || args.topology == Topology::Chain) {
for(int t = 0; t < args.numTasks; ++t) {
queue.submit([=](celerity::handler& cgh) {
if(args.topology == Topology::Chain) {
celerity::accessor acc{buffer, cgh, celerity::access::one_to_one(), celerity::read_write};
cgh.parallel_for<class ChainKernel>(celerity::range<1>(args.bufferSize), [=](celerity::item<1> item) { acc[item]++; });
} else {
cgh.parallel_for<class SoupKernel>(celerity::range<1>(args.bufferSize), [=](celerity::item<1> item) {});
}
});
}
} else if(args.topology == Topology::Map || args.topology == Topology::Reduce) {
celerity::buffer<float, 1> buffer2(host_data.data(), args.bufferSize);

int numEpochs = std::log2(args.numTasks);
int curEpochTasks = args.topology == Topology::Map ? 1 : 1 << numEpochs;
int sentinelEpoch = args.topology == Topology::Map ? numEpochs - 1 : 0;
int sentinelEpochMax = args.numTasks - (curEpochTasks - 1); // how many tasks to generate at the last/first epoch to reach exactly args.numTasks
for(int e = 0; e < numEpochs; ++e) {
int taskCount = curEpochTasks;
if(e == sentinelEpoch) taskCount = sentinelEpochMax;

// build tasks for this epoch
for(int t=0; t<taskCount; ++t) {
queue.submit([=](celerity::handler& cgh) {
// mappers constructed to build a binary (potentially inverted) tree
auto read_mapper = [=](const celerity::chunk<1>& chunk) {
return args.topology == Topology::Map ? celerity::subrange<1>(t/2, 1) : celerity::subrange<1>(t*2, 2);
};
auto write_mapper = [=](const celerity::chunk<1>& chunk) {
return celerity::subrange<1>(t, 1);
};
celerity::accessor write_acc{buffer, cgh, write_mapper, celerity::write_only};
celerity::accessor read_acc{buffer2, cgh, read_mapper, celerity::read_only};
cgh.parallel_for<class TreeKernel>(celerity::range<1>(1), [=](celerity::item<1> item) { write_acc[item] = read_acc[item]; });
});
}

// get ready for the next epoch
if(args.topology == Topology::Map) {
curEpochTasks *= 2;
} else {
curEpochTasks /= 2;
}
std::swap(buffer, buffer2);
}
}

{ // basic verification
// check that there are more than the requested number of tasks generated, but less than 2x the requested number
// (it will be more due to the initialization task and horizon tasks)
int totalTaskCount = celerity::detail::runtime::get_instance().get_task_manager().get_total_task_count();
if(totalTaskCount < args.numTasks || totalTaskCount > args.numTasks * 2) {
std::cerr << "Error: asked to generate " << args.numTasks << " tasks, but generated " << totalTaskCount << "." << std::endl;
}
}
}
1 change: 1 addition & 0 deletions vendor/args
Submodule args added at a48e1f

0 comments on commit 3d57601

Please sign in to comment.