Skip to content

Commit

Permalink
1.77s -> 1.50s. Create a job pool so that faster threads aren't idle …
Browse files Browse the repository at this point in the history
…as much.
  • Loading branch information
Jumbub committed Mar 7, 2022
1 parent c3ab29a commit d350d8b
Show file tree
Hide file tree
Showing 11 changed files with 46 additions and 20 deletions.
4 changes: 2 additions & 2 deletions src/benchmark/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ static void BM_NextBoard(benchmark::State& state) {
setBenchmarkBoard(board);

for (auto _ : state) {
nextBoard(board, PROBABLY_OPTIMAL_THREAD_COUNT);
nextBoard(board, PROBABLY_OPTIMAL_THREAD_COUNT, PROBABLY_OPTIMAL_JOB_COUNT);
}
}

Expand All @@ -33,7 +33,7 @@ BENCHMARK(BM_RenderBoard)->Unit(benchmark::kMillisecond)->MeasureProcessCPUTime(
static void BM_Main(benchmark::State& state) {
Loop loop(TEST_WIDTH, TEST_HEIGHT, TEST_TITLE, TEST_RESIZABLE);
for (auto _ : state) {
loop.run(2000, PROBABLY_OPTIMAL_THREAD_COUNT, 1000000 / 30);
loop.run(2000, PROBABLY_OPTIMAL_THREAD_COUNT, PROBABLY_OPTIMAL_JOB_COUNT, 1000000 / 30);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/benchmark/main_short.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
static void BM_DifferentThreadCounts(benchmark::State& state) {
Loop loop(2560, 1440, "Game of Speed [Benchmark]", false);
for (auto _ : state) {
loop.run(2000, state.range(0), 1000000 / 30);
loop.run(2000, 4, state.range(0), 1000000 / 30);
}
}

BENCHMARK(BM_DifferentThreadCounts)
->Unit(benchmark::kSecond)
->MeasureProcessCPUTime()
->DenseRange(1, PROBABLY_OPTIMAL_THREAD_COUNT * 2, 1);
->DenseRange(2, PROBABLY_OPTIMAL_JOB_COUNT * 10, 2);

BENCHMARK_MAIN();
6 changes: 3 additions & 3 deletions src/graphics/loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ Loop::~Loop() {
delete[] pixels;
}

void Loop::run(ulong maxGenerations, uint threadCount, ulong renderMinimumMicroseconds) {
void Loop::run(ulong maxGenerations, uint threadCount, uint jobCount, ulong renderMinimumMicroseconds) {
ulong computedGenerations = 0;
auto nextBoardThread = startNextBoardLoopThread(maxGenerations, threadCount, board, computedGenerations);
auto nextBoardThread = startNextBoardLoopThread(maxGenerations, threadCount, jobCount, board, computedGenerations);

sf::Clock clock;
while (window.isOpen() && computedGenerations < maxGenerations) {
Expand All @@ -40,7 +40,7 @@ void Loop::run(ulong maxGenerations, uint threadCount, ulong renderMinimumMicros
ImGui::SFML::Update(window, delta);

renderBoard(board, window, sprite, texture, pixels);
renderImguiMenu(board, window, delta, computedGenerations, threadCount, renderMinimumMicroseconds);
renderImguiMenu(board, window, delta, computedGenerations, threadCount, jobCount, renderMinimumMicroseconds);

ImGui::SFML::Render(window);

Expand Down
2 changes: 1 addition & 1 deletion src/graphics/loop.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
struct Loop {
Loop(const uint width, const uint height, const std::string title, const bool resizable);
~Loop();
void run(const ulong maxComputations, uint threadCount, const ulong renderMinimumMicroseconds);
void run(const ulong maxComputations, uint threadCount, uint jobCount, const ulong renderMinimumMicroseconds);

sf::RenderWindow window;
sf::Texture texture;
Expand Down
11 changes: 10 additions & 1 deletion src/graphics/renderImguiMenu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ void renderImguiMenu(
const sf::Time& renderDelta,
const ulong& computedGenerations,
uint& threadCount,
uint& jobCount,
ulong& renderMinimumMicroseconds) {
ImGui::Begin("Configuration");

Expand All @@ -35,12 +36,20 @@ void renderImguiMenu(
"Computations/second: %.2f", (float)(computedGenerations - lastComputedGenerations) / renderDelta.asSeconds());
lastComputedGenerations = computedGenerations;
int tpb = (int)threadCount;
ImGui::SliderInt("Threads/computation", &tpb, 1, (int)PROBABLY_OPTIMAL_THREAD_COUNT * 4);
ImGui::SliderInt("Threads/frame", &tpb, 1, (int)PROBABLY_OPTIMAL_THREAD_COUNT * 4);
if ((uint)tpb != threadCount) {
auto scope = LockForScope(board.lock);
threadCount = (uint)tpb;
}

// Jobs per frame
int jobs = (int)jobCount;
ImGui::SliderInt("Jobs/frame", &jobs, 1, (int)PROBABLY_OPTIMAL_JOB_COUNT * 10);
if ((uint)jobs != jobCount) {
auto scope = LockForScope(board.lock);
jobCount = (uint)jobs;
}

// Reset action
if (ImGui::Button("Restart")) {
auto scope = LockForScope(board.lock);
Expand Down
1 change: 1 addition & 0 deletions src/graphics/renderImguiMenu.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ void renderImguiMenu(
const sf::Time& renderDelta,
const ulong& computedGenerations,
uint& threadCount,
uint& jobCount,
ulong& renderMinimumMicroseconds);
29 changes: 21 additions & 8 deletions src/logic/next.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,29 +59,41 @@ void nextBoardSection(
}
}

void nextBoard(Board& board, const uint& threadCount) {
void nextBoard(Board& board, const uint& threadCount, const uint& jobCount) {
board.setOutputToInput();

std::memset(board.outSkip, true, sizeof(Cell) * board.rawSize);

const uint segmentSize = (board.height / threadCount + board.height % threadCount) * board.rawWidth;
const uint segmentSize = (board.height / jobCount + board.height % jobCount) * board.rawWidth;
uint endI = board.rawWidth + 1;

std::vector<std::thread> threads(threadCount);

for (auto& thread : threads) {
std::vector<std::function<void()>> jobs(jobCount);
std::atomic<uint> job = {0};

for (auto& job : jobs) {
const uint beginI = endI;
endI = std::min(board.rawSize - board.rawWidth, endI + segmentSize);
board.inSkip[endI] = false; // Never skip last cell

thread = std::thread([&, beginI, endI]() {
board.inSkip[endI] = false; // Never skip last cell

job = [&, beginI, endI]() {
nextBoardSection(beginI, endI, board.rawWidth, board.input, board.output, board.inSkip, board.outSkip);
};
};

for (auto& thread : threads) {
thread = std::thread([&]() {
uint current = job.fetch_add(1);
while (current < jobCount) {
jobs[current]();
current = job.fetch_add(1);
}
});
};

for (auto& thread : threads) {
thread.join();
thread.join(); // TODO: experiment with detaching threads & using a "done" flag
}

assignBoardPadding(board);
Expand All @@ -90,13 +102,14 @@ void nextBoard(Board& board, const uint& threadCount) {
std::thread startNextBoardLoopThread(
const ulong& maxGenerations,
const uint& threadCount,
const uint& jobCount,
Board& board,
ulong& computedGenerations) {
return std::thread{[&]() {
while (computedGenerations < maxGenerations) {
board.lock.pauseIfRequested();

nextBoard(board, threadCount);
nextBoard(board, threadCount, jobCount);
++computedGenerations;
}
}};
Expand Down
3 changes: 2 additions & 1 deletion src/logic/next.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
#include <thread>
#include "board.h"

void nextBoard(Board& board, const uint& threadCount);
void nextBoard(Board& board, const uint& threadCount, const uint& jobCount);

std::thread startNextBoardLoopThread(
const ulong& maxGenerations,
const uint& threadCount,
const uint& jobCount,
Board& board,
ulong& computedGenerations);
1 change: 1 addition & 0 deletions src/logic/threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@

// The number of threads that was optimal in the benchmarks
const uint PROBABLY_OPTIMAL_THREAD_COUNT = std::max(std::thread::hardware_concurrency(), (unsigned int)1);
const uint PROBABLY_OPTIMAL_JOB_COUNT = 16;
3 changes: 2 additions & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ int main(int argc, char** argv) {
const uint maxGenerations = argc > 2 ? (uint)atoi(argv[2]) : UINT32_MAX;
const ulong rendersPerSecond = argc > 3 ? (uint)atoi(argv[3]) : 30;
const uint workerThreads = argc > 4 ? (uint)atoi(argv[4]) : PROBABLY_OPTIMAL_THREAD_COUNT;
const uint jobCount = argc > 4 ? (uint)atoi(argv[4]) : PROBABLY_OPTIMAL_JOB_COUNT;

Loop(2560, 1440, "Game of Speed", resizable).run(maxGenerations, workerThreads, 1000000 / rendersPerSecond);
Loop(2560, 1440, "Game of Speed", resizable).run(maxGenerations, workerThreads, jobCount, 1000000 / rendersPerSecond);

return EXIT_SUCCESS;
}
2 changes: 1 addition & 1 deletion src/test/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void compare(BoardVector a, BoardVector b, uint generations = 1) {
generate(boardA, a);
generate(boardB, b);
for (uint i = 0; i < generations; i++)
nextBoard(boardA, PROBABLY_OPTIMAL_THREAD_COUNT);
nextBoard(boardA, PROBABLY_OPTIMAL_THREAD_COUNT, PROBABLY_OPTIMAL_JOB_COUNT);
REQUIRE(ungenerate(boardA) == ungenerate(boardB));
}

Expand Down

0 comments on commit d350d8b

Please sign in to comment.