diff --git a/src/benchmark/main.cpp b/src/benchmark/main.cpp index e25e526..faa6c20 100644 --- a/src/benchmark/main.cpp +++ b/src/benchmark/main.cpp @@ -15,7 +15,7 @@ static void BM_NextBoard(benchmark::State& state) { setBenchmarkBoard(board); for (auto _ : state) { - nextBoard(board, PROBABLY_OPTIMAL_THREAD_COUNT); + nextBoard(board, PROBABLY_OPTIMAL_THREAD_COUNT, PROBABLY_OPTIMAL_JOB_COUNT); } } @@ -33,7 +33,7 @@ BENCHMARK(BM_RenderBoard)->Unit(benchmark::kMillisecond)->MeasureProcessCPUTime( static void BM_Main(benchmark::State& state) { Loop loop(TEST_WIDTH, TEST_HEIGHT, TEST_TITLE, TEST_RESIZABLE); for (auto _ : state) { - loop.run(2000, PROBABLY_OPTIMAL_THREAD_COUNT, 1000000 / 30); + loop.run(2000, PROBABLY_OPTIMAL_THREAD_COUNT, PROBABLY_OPTIMAL_JOB_COUNT, 1000000 / 30); } } diff --git a/src/benchmark/main_short.cpp b/src/benchmark/main_short.cpp index fe71396..d39d8fd 100644 --- a/src/benchmark/main_short.cpp +++ b/src/benchmark/main_short.cpp @@ -7,13 +7,13 @@ static void BM_DifferentThreadCounts(benchmark::State& state) { Loop loop(2560, 1440, "Game of Speed [Benchmark]", false); for (auto _ : state) { - loop.run(2000, state.range(0), 1000000 / 30); + loop.run(2000, 4, state.range(0), 1000000 / 30); } } BENCHMARK(BM_DifferentThreadCounts) ->Unit(benchmark::kSecond) ->MeasureProcessCPUTime() - ->DenseRange(1, PROBABLY_OPTIMAL_THREAD_COUNT * 2, 1); + ->DenseRange(2, PROBABLY_OPTIMAL_JOB_COUNT * 10, 2); BENCHMARK_MAIN(); diff --git a/src/graphics/loop.cpp b/src/graphics/loop.cpp index 6c60166..0bdd887 100644 --- a/src/graphics/loop.cpp +++ b/src/graphics/loop.cpp @@ -29,9 +29,9 @@ Loop::~Loop() { delete[] pixels; } -void Loop::run(ulong maxGenerations, uint threadCount, ulong renderMinimumMicroseconds) { +void Loop::run(ulong maxGenerations, uint threadCount, uint jobCount, ulong renderMinimumMicroseconds) { ulong computedGenerations = 0; - auto nextBoardThread = startNextBoardLoopThread(maxGenerations, threadCount, board, computedGenerations); + auto nextBoardThread = startNextBoardLoopThread(maxGenerations, threadCount, jobCount, board, computedGenerations); sf::Clock clock; while (window.isOpen() && computedGenerations < maxGenerations) { @@ -40,7 +40,7 @@ void Loop::run(ulong maxGenerations, uint threadCount, ulong renderMinimumMicros ImGui::SFML::Update(window, delta); renderBoard(board, window, sprite, texture, pixels); - renderImguiMenu(board, window, delta, computedGenerations, threadCount, renderMinimumMicroseconds); + renderImguiMenu(board, window, delta, computedGenerations, threadCount, jobCount, renderMinimumMicroseconds); ImGui::SFML::Render(window); diff --git a/src/graphics/loop.h b/src/graphics/loop.h index afb2ecb..92f795f 100644 --- a/src/graphics/loop.h +++ b/src/graphics/loop.h @@ -6,7 +6,7 @@ struct Loop { Loop(const uint width, const uint height, const std::string title, const bool resizable); ~Loop(); - void run(const ulong maxComputations, uint threadCount, const ulong renderMinimumMicroseconds); + void run(const ulong maxComputations, uint threadCount, uint jobCount, const ulong renderMinimumMicroseconds); sf::RenderWindow window; sf::Texture texture; diff --git a/src/graphics/renderImguiMenu.cpp b/src/graphics/renderImguiMenu.cpp index 5a1c26a..dc1df82 100644 --- a/src/graphics/renderImguiMenu.cpp +++ b/src/graphics/renderImguiMenu.cpp @@ -11,6 +11,7 @@ void renderImguiMenu( const sf::Time& renderDelta, const ulong& computedGenerations, uint& threadCount, + uint& jobCount, ulong& renderMinimumMicroseconds) { ImGui::Begin("Configuration"); @@ -35,12 +36,20 @@ void renderImguiMenu( "Computations/second: %.2f", (float)(computedGenerations - lastComputedGenerations) / renderDelta.asSeconds()); lastComputedGenerations = computedGenerations; int tpb = (int)threadCount; - ImGui::SliderInt("Threads/computation", &tpb, 1, (int)PROBABLY_OPTIMAL_THREAD_COUNT * 4); + ImGui::SliderInt("Threads/frame", &tpb, 1, (int)PROBABLY_OPTIMAL_THREAD_COUNT * 4); if ((uint)tpb != threadCount) { auto scope = LockForScope(board.lock); threadCount = (uint)tpb; } + // Jobs per frame + int jobs = (int)jobCount; + ImGui::SliderInt("Jobs/frame", &jobs, 1, (int)PROBABLY_OPTIMAL_JOB_COUNT * 10); + if ((uint)jobs != jobCount) { + auto scope = LockForScope(board.lock); + jobCount = (uint)jobs; + } + // Reset action if (ImGui::Button("Restart")) { auto scope = LockForScope(board.lock); diff --git a/src/graphics/renderImguiMenu.h b/src/graphics/renderImguiMenu.h index cca89fd..0ab0c6f 100644 --- a/src/graphics/renderImguiMenu.h +++ b/src/graphics/renderImguiMenu.h @@ -9,4 +9,5 @@ void renderImguiMenu( const sf::Time& renderDelta, const ulong& computedGenerations, uint& threadCount, + uint& jobCount, ulong& renderMinimumMicroseconds); diff --git a/src/logic/next.cpp b/src/logic/next.cpp index 26351fa..cb303d8 100644 --- a/src/logic/next.cpp +++ b/src/logic/next.cpp @@ -59,29 +59,41 @@ void nextBoardSection( } } -void nextBoard(Board& board, const uint& threadCount) { +void nextBoard(Board& board, const uint& threadCount, const uint& jobCount) { board.setOutputToInput(); std::memset(board.outSkip, true, sizeof(Cell) * board.rawSize); - const uint segmentSize = (board.height / threadCount + board.height % threadCount) * board.rawWidth; + const uint segmentSize = (board.height / jobCount + board.height % jobCount) * board.rawWidth; uint endI = board.rawWidth + 1; std::vector threads(threadCount); - for (auto& thread : threads) { + std::vector> jobs(jobCount); + std::atomic job = {0}; + + for (auto& job : jobs) { const uint beginI = endI; endI = std::min(board.rawSize - board.rawWidth, endI + segmentSize); + board.inSkip[endI] = false; // Never skip last cell - thread = std::thread([&, beginI, endI]() { - board.inSkip[endI] = false; // Never skip last cell - + job = [&, beginI, endI]() { nextBoardSection(beginI, endI, board.rawWidth, board.input, board.output, board.inSkip, board.outSkip); + }; + }; + + for (auto& thread : threads) { + thread = std::thread([&]() { + uint current = job.fetch_add(1); + while (current < jobCount) { + jobs[current](); + current = job.fetch_add(1); + } }); }; for (auto& thread : threads) { - thread.join(); + thread.join(); // TODO: experiment with detaching threads & using a "done" flag } assignBoardPadding(board); @@ -90,13 +102,14 @@ void nextBoard(Board& board, const uint& threadCount) { std::thread startNextBoardLoopThread( const ulong& maxGenerations, const uint& threadCount, + const uint& jobCount, Board& board, ulong& computedGenerations) { return std::thread{[&]() { while (computedGenerations < maxGenerations) { board.lock.pauseIfRequested(); - nextBoard(board, threadCount); + nextBoard(board, threadCount, jobCount); ++computedGenerations; } }}; diff --git a/src/logic/next.h b/src/logic/next.h index ef6ebe5..468f4fb 100644 --- a/src/logic/next.h +++ b/src/logic/next.h @@ -3,10 +3,11 @@ #include #include "board.h" -void nextBoard(Board& board, const uint& threadCount); +void nextBoard(Board& board, const uint& threadCount, const uint& jobCount); std::thread startNextBoardLoopThread( const ulong& maxGenerations, const uint& threadCount, + const uint& jobCount, Board& board, ulong& computedGenerations); diff --git a/src/logic/threads.h b/src/logic/threads.h index f84872d..9d7e24a 100644 --- a/src/logic/threads.h +++ b/src/logic/threads.h @@ -4,3 +4,4 @@ // The number of threads that was optimal in the benchmarks const uint PROBABLY_OPTIMAL_THREAD_COUNT = std::max(std::thread::hardware_concurrency(), (unsigned int)1); +const uint PROBABLY_OPTIMAL_JOB_COUNT = 16; diff --git a/src/main.cpp b/src/main.cpp index 8813e69..326a6b9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,8 +6,9 @@ int main(int argc, char** argv) { const uint maxGenerations = argc > 2 ? (uint)atoi(argv[2]) : UINT32_MAX; const ulong rendersPerSecond = argc > 3 ? (uint)atoi(argv[3]) : 30; const uint workerThreads = argc > 4 ? (uint)atoi(argv[4]) : PROBABLY_OPTIMAL_THREAD_COUNT; + const uint jobCount = argc > 4 ? (uint)atoi(argv[4]) : PROBABLY_OPTIMAL_JOB_COUNT; - Loop(2560, 1440, "Game of Speed", resizable).run(maxGenerations, workerThreads, 1000000 / rendersPerSecond); + Loop(2560, 1440, "Game of Speed", resizable).run(maxGenerations, workerThreads, jobCount, 1000000 / rendersPerSecond); return EXIT_SUCCESS; } diff --git a/src/test/main.cpp b/src/test/main.cpp index 35baa83..9c586c3 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -39,7 +39,7 @@ void compare(BoardVector a, BoardVector b, uint generations = 1) { generate(boardA, a); generate(boardB, b); for (uint i = 0; i < generations; i++) - nextBoard(boardA, PROBABLY_OPTIMAL_THREAD_COUNT); + nextBoard(boardA, PROBABLY_OPTIMAL_THREAD_COUNT, PROBABLY_OPTIMAL_JOB_COUNT); REQUIRE(ungenerate(boardA) == ungenerate(boardB)); }