From 21761f93075a7aa26c314ae675b92df07c5f5892 Mon Sep 17 00:00:00 2001 From: Jamie Bray Date: Sun, 3 Oct 2021 23:29:52 +0800 Subject: [PATCH] 12ms -> 7ms. Enable full optimisation mode in gcc --- makefile | 6 ++--- results/benchmark.txt | 4 ++-- src/board/generate.cpp | 4 ---- src/board/next.cpp | 48 ++++++++++++++++++++-------------------- src/entrypoints/main.cpp | 32 +++++++++++++++++---------- 5 files changed, 49 insertions(+), 45 deletions(-) diff --git a/makefile b/makefile index 390ab5e..f6f0b53 100644 --- a/makefile +++ b/makefile @@ -1,8 +1,8 @@ CC = g++ -COMPILER_FLAGS = -Wall -std=c++2a -lpthread -COMPILER_FLAGS_PROFILE = $(COMPILER_FLAGS) -pg -COMPILER_FLAGS_DEBUG = $(COMPILER_FLAGS) -ggdb +COMPILER_FLAGS = -Wall -W -pedantic -Werror -std=c++2a -lpthread -Ofast +COMPILER_FLAGS_PROFILE = $(COMPILER_FLAGS) -O0 -pg +COMPILER_FLAGS_DEBUG = $(COMPILER_FLAGS) -O0 -ggdb LINKER_FLAGS_BENCHMARK = -isystem benchmark/include -Lbenchmark/build/src -lbenchmark LINKER_FLAGS_GRAPHICS = -lSDL2 diff --git a/results/benchmark.txt b/results/benchmark.txt index fce41ac..c340ce7 100644 --- a/results/benchmark.txt +++ b/results/benchmark.txt @@ -1,5 +1,5 @@ -------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------- -BM_NextBoard/process_time/real_time 12.2 ms 45.2 ms 54 -BM_RenderBoard/process_time/real_time 7.13 ms 7.04 ms 90 +BM_NextBoard/process_time/real_time 7.08 ms 24.7 ms 94 +BM_RenderBoard/process_time/real_time 7.13 ms 7.11 ms 88 diff --git a/src/board/generate.cpp b/src/board/generate.cpp index 69a00be..94c769c 100644 --- a/src/board/generate.cpp +++ b/src/board/generate.cpp @@ -20,10 +20,6 @@ Board randomBoard(int width, int height) { * - bottom: breeder */ Board benchmarkBoard(int width, int height) { - if (height < BREEDER_HEIGHT * 2) - throw std::underflow_error( - "Did not meet minimum height required for the benchmark board"); - srand(0); auto board = new Cell[width * height]; for (int y = 0; y < height; ++y) { diff --git a/src/board/next.cpp b/src/board/next.cpp index b09ed28..6878115 100644 --- a/src/board/next.cpp +++ b/src/board/next.cpp @@ -8,14 +8,20 @@ #include #include +int THREAD_COUNT = + std::max(std::thread::hardware_concurrency(), (unsigned int)1); + +int getThreads() { return THREAD_COUNT; } +void setThreads(int n) { THREAD_COUNT = std::max(n, 1); } + void nextBoardSection(const int startY, const int endY, const Board &board, - Cell *output) { + Cell *output) { const auto &[input, width, height] = board; - int neighbours[3]; - int yAboveBase; - int yBelowBase; - int yBase; + int neighbours[3] = {0,0,0}; + int yAboveBase = 0; + int yBelowBase = 0; + int yBase = 0; const auto endI = endY * width; for (int i = startY * width; i < endI; i++) { @@ -70,36 +76,30 @@ void nextBoardSection(const int startY, const int endY, const Board &board, } } -int THREAD_COUNT = - std::max(std::thread::hardware_concurrency(), (unsigned int)1); - -int getThreads() { return THREAD_COUNT; } -void setThreads(int n) { THREAD_COUNT = std::max(n, 1); } - Board nextBoard(const Board &board) { const auto &[input, width, height] = board; auto output = new Cell[width * height]; - auto threads = std::min(getThreads(), height); - auto split = height / threads; - auto remainder = height % threads; + auto totalThreads = std::min(getThreads(), height); + auto threadLines = height / totalThreads; + auto threadLinesRemaining = height % totalThreads; - std::thread nextBoardSegments[threads]; - for (int thread = 0; thread < threads; thread++) { + std::vector threads; + for (int t = 0; t < totalThreads; t++) { // Compute start and end indexes for threads - const auto startY = thread * split; - auto endY = (thread + 1) * split; + const auto startY = t * threadLines; + auto endY = (t + 1) * threadLines; // In the case of an uneven divide, the last thread gets the left-overs - if (thread == threads - 1) - endY += remainder; + if (t == totalThreads - 1) + endY += threadLinesRemaining; - nextBoardSegments[thread] = - std::thread(&nextBoardSection, startY, endY, board, output); + threads.push_back( + std::thread(&nextBoardSection, startY, endY, board, output)); } - for (int i = 0; i < threads; i++) { - nextBoardSegments[i].join(); + for (auto &thread : threads) { + thread.join(); } free(input); diff --git a/src/entrypoints/main.cpp b/src/entrypoints/main.cpp index cef02cc..6e3c0c0 100644 --- a/src/entrypoints/main.cpp +++ b/src/entrypoints/main.cpp @@ -24,10 +24,11 @@ int main() { SDL_Event event; // Create window - SDL_Renderer *renderer; - SDL_Window *window; - SDL_CreateWindowAndRenderer(2560, 1440, SDL_WINDOW_RESIZABLE, &window, - &renderer); + SDL_Window *window = SDL_CreateWindow( + "Game of Speed", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 2560, + 1440, SDL_WINDOW_RESIZABLE); + SDL_Renderer *renderer = + SDL_CreateRenderer(window, -1, SDL_RENDERER_ACCELERATED); // Window texture int width, height; @@ -40,8 +41,10 @@ int main() { bool running = true; bool recreateBoard = false; while (running) { - /* auto loopTimer = startProfiling(); */ + auto loopTimer = startProfiling(); + auto sdlTimer = startProfiling(); + // Start computing next board #ifdef ENABLE_THREADING std::promise nextBoardPromise; auto nextBoardFuture = nextBoardPromise.get_future(); @@ -58,28 +61,33 @@ int main() { running = false; // Re-create board when Enter is pressed, or window is resized else if ((event.type == SDL_KEYDOWN && - event.key.keysym.scancode == SDL_SCANCODE_RETURN) || (event.type == SDL_WINDOWEVENT && - event.window.event == SDL_WINDOWEVENT_RESIZED)) { + event.key.keysym.scancode == SDL_SCANCODE_RETURN) || + (event.type == SDL_WINDOWEVENT && + event.window.event == SDL_WINDOWEVENT_RESIZED)) { recreateBoard = true; } else if (event.type == SDL_KEYDOWN && - event.key.keysym.scancode == SDL_SCANCODE_J) { - setThreads(getThreads()-1); + event.key.keysym.scancode == SDL_SCANCODE_J) { + setThreads(getThreads() - 1); std::cout << "Setting thread count: " << getThreads() << std::endl; } else if (event.type == SDL_KEYDOWN && - event.key.keysym.scancode == SDL_SCANCODE_K) { - setThreads(getThreads()+1); + event.key.keysym.scancode == SDL_SCANCODE_K) { + setThreads(getThreads() + 1); std::cout << "Setting thread count: " << getThreads() << std::endl; } } renderBoardSdl(board, renderer, texture); + stopProfiling(sdlTimer, " sdl"); + // Wait for the board computation thread to complete + auto joiningTimer = startProfiling(); #ifdef ENABLE_THREADING nextBoardThread.join(); board = nextBoardFuture.get(); #else board = nextBoard(board); #endif + stopProfiling(joiningTimer, " nextBoard.join"); // Re-create board when computation is complete if (recreateBoard) { @@ -93,7 +101,7 @@ int main() { std::cout << "Re-created board: " << width << "x" << height << std::endl; } - /* stopProfiling(loopTimer, "Done loop"); */ + stopProfiling(loopTimer, "main"); } free(get<0>(board));