From 2f68783cc8a3d9bd7590e617b71a5a0df010af56 Mon Sep 17 00:00:00 2001 From: Jamie Bray Date: Sun, 3 Oct 2021 21:58:09 +0800 Subject: [PATCH] 50ms -> 15ms. Laziest solution to the computation speed... MORE THREADS --- makefile | 7 +++-- results/benchmark.txt | 10 +++---- src/board/next.cpp | 46 +++++++++++++++++++++++++++++---- src/board/next.h | 4 +++ src/entrypoints/benchmark.cpp | 5 ++-- src/entrypoints/interactive.cpp | 8 ++++++ 6 files changed, 65 insertions(+), 15 deletions(-) diff --git a/makefile b/makefile index 8411df2..cacdba4 100644 --- a/makefile +++ b/makefile @@ -1,10 +1,10 @@ CC = g++ COMPILER_FLAGS = -Wall -std=c++2a -lpthread -COMPILER_FLAGS_BENCHMARK = $(COMPILER_FLAGS) -isystem benchmark/include -Lbenchmark/build/src -lbenchmark COMPILER_FLAGS_PROFILE = $(COMPILER_FLAGS) -pg COMPILER_FLAGS_DEBUG = $(COMPILER_FLAGS) -ggdb +LINKER_FLAGS_BENCHMARK = -isystem benchmark/include -Lbenchmark/build/src -lbenchmark LINKER_FLAGS_GRAPHICS = -lSDL2 OUTPUT = build/out @@ -26,8 +26,11 @@ test: build $(CC) src/entrypoints/test.cpp $(COMPILER_FLAGS) -ggdb -o $(OUTPUT) $(OBJS) ./$(OUTPUT) +benchmark_out: build + $(CC) src/entrypoints/benchmark.cpp $(COMPILER_FLAGS) $(LINKER_FLAGS_BENCHMARK) $(LINKER_FLAGS_GRAPHICS) -o $(OUTPUT) $(OBJS_GRAPHICS) + benchmark: build - $(CC) src/entrypoints/benchmark.cpp $(COMPILER_FLAGS_BENCHMARK) $(LINKER_FLAGS_GRAPHICS) -o $(OUTPUT) $(OBJS_GRAPHICS) + make benchmark_out ./$(OUTPUT) > results/benchmark.txt build: diff --git a/results/benchmark.txt b/results/benchmark.txt index 1c3d0f4..3704d93 100644 --- a/results/benchmark.txt +++ b/results/benchmark.txt @@ -1,5 +1,5 @@ ------------------------------------------------------------------------- -Benchmark Time CPU Iterations ------------------------------------------------------------------------- -BM_NextBoard/min_time:5.000 50.0 ms 49.9 ms 126 -BM_RenderBoard/min_time:5.000 6.96 ms 6.95 ms 1006 +-------------------------------------------------------------------------------- +Benchmark Time CPU Iterations +-------------------------------------------------------------------------------- +BM_NextBoard/process_time/real_time 15.4 ms 51.3 ms 42 +BM_RenderBoard/process_time/real_time 7.04 ms 6.99 ms 99 diff --git a/src/board/next.cpp b/src/board/next.cpp index 9dda3a9..3e724d3 100644 --- a/src/board/next.cpp +++ b/src/board/next.cpp @@ -2,20 +2,23 @@ #include "../util/profile.h" #include #include +#include #include +#include #include #include -Board nextBoard(const Board &board) { +void nextBoardSection(const int startY, const int endY, const Board &board, + Cell *output) { const auto &[input, width, height] = board; - auto output = new Cell[width * height]; int neighbours[3]; int yAboveBase; int yBelowBase; int yBase; - for (int i = 0; i < width * height; i++) { + const auto endI = endY * width; + for (int i = startY * width; i < endI; i++) { const int x = i % width; auto currentStateBool = input[i] == ALIVE ? 1 : 0; @@ -45,8 +48,8 @@ Board nextBoard(const Board &board) { (input[yAboveBase + previousX] ? 1 : 0); } if (neighbours[1] == -1) { - neighbours[1] = (input[yBelowBase + x] ? 1 : 0) + - (input[yAboveBase + x] ? 1 : 0); + neighbours[1] = + (input[yBelowBase + x] ? 1 : 0) + (input[yAboveBase + x] ? 1 : 0); } const auto nextX = (x + 1) % width; neighbours[2] = (input[yBelowBase + nextX] ? 1 : 0) + @@ -65,6 +68,39 @@ Board nextBoard(const Board &board) { // Add self to neighbours count neighbours[1] += currentStateBool; } +} + +int THREAD_COUNT = + std::max(std::thread::hardware_concurrency(), (unsigned int)1); + +int getThreads() { return THREAD_COUNT; } +void setThreads(int n) { THREAD_COUNT = std::max(n, 1); } + +Board nextBoard(const Board &board) { + const auto &[input, width, height] = board; + auto output = new Cell[width * height]; + const auto threads = getThreads(); + + const auto split = height / std::min(height, (int)threads); + const auto remainder = height % threads; + + std::thread nextBoardSegments[threads]; + for (int thread = 0; thread < threads; thread++) { + // Compute start and end indexes for threads + const auto startY = thread * split; + auto endY = (thread + 1) * split; + + // In the case of an uneven divide, the last thread gets the left-overs + if (thread == threads - 1) + endY += remainder; + + nextBoardSegments[thread] = + std::thread(&nextBoardSection, startY, endY, board, output); + } + + for (int i = 0; i < threads; i++) { + nextBoardSegments[i].join(); + } free(input); diff --git a/src/board/next.h b/src/board/next.h index bd37a3f..5248ae2 100644 --- a/src/board/next.h +++ b/src/board/next.h @@ -1,5 +1,9 @@ #pragma once #include "board.h" +#include Board nextBoard(const Board &input); + +int getThreads(); +void setThreads(int n); diff --git a/src/entrypoints/benchmark.cpp b/src/entrypoints/benchmark.cpp index 5e98350..8cb3d3c 100644 --- a/src/entrypoints/benchmark.cpp +++ b/src/entrypoints/benchmark.cpp @@ -16,6 +16,7 @@ static void BM_NextBoard(benchmark::State &state) { free(get<0>(board)); } +BENCHMARK(BM_NextBoard)->Unit(benchmark::kMillisecond)->MeasureProcessCPUTime()->UseRealTime(); static void BM_RenderBoard(benchmark::State &state) { auto board = benchmarkBoard(TEST_WIDTH, TEST_HEIGHT); @@ -39,8 +40,6 @@ static void BM_RenderBoard(benchmark::State &state) { free(get<0>(board)); } - -BENCHMARK(BM_NextBoard)->Unit(benchmark::kMillisecond)->MinTime(5); -BENCHMARK(BM_RenderBoard)->Unit(benchmark::kMillisecond)->MinTime(5); +BENCHMARK(BM_RenderBoard)->Unit(benchmark::kMillisecond)->MeasureProcessCPUTime()->UseRealTime(); BENCHMARK_MAIN(); diff --git a/src/entrypoints/interactive.cpp b/src/entrypoints/interactive.cpp index d17c49d..36b9614 100644 --- a/src/entrypoints/interactive.cpp +++ b/src/entrypoints/interactive.cpp @@ -60,6 +60,14 @@ int main() { else if (event.type == SDL_KEYDOWN && event.key.keysym.scancode == SDL_SCANCODE_RETURN) { recreateBoard = true; + } else if (event.type == SDL_KEYDOWN && + event.key.keysym.scancode == SDL_SCANCODE_J) { + setThreads(getThreads()-1); + std::cout << "Setting thread count: " << getThreads() << std::endl; + } else if (event.type == SDL_KEYDOWN && + event.key.keysym.scancode == SDL_SCANCODE_K) { + setThreads(getThreads()+1); + std::cout << "Setting thread count: " << getThreads() << std::endl; } }