Skip to content

Commit

Permalink
Add multithreaded support in the DWT encoder.
Browse files Browse the repository at this point in the history
Update the bench_dwt utility to have a -decode/-encode switch

Measured performance gains for DWT encoder on a
Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz (4 cores, hyper threaded)

Encoding time:
$ ./bin/bench_dwt -encode -num_threads 1
time for dwt_encode: total = 8.348 s, wallclock = 8.352 s

$ ./bin/bench_dwt -encode -num_threads 2
time for dwt_encode: total = 9.776 s, wallclock = 4.904 s

$ ./bin/bench_dwt -encode -num_threads 4
time for dwt_encode: total = 13.188 s, wallclock = 3.310 s

$ ./bin/bench_dwt -encode -num_threads 8
time for dwt_encode: total = 30.024 s, wallclock = 4.064 s

Scaling is probably limited by memory access patterns causing
memory access to be the bottleneck.
The slightly worse results with threads==8 than with thread==4
is due to hyperthreading being not appropriate here.
  • Loading branch information
rouault committed Apr 30, 2020
1 parent d448931 commit 0dfc2d7
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 64 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,9 @@ if(BUILD_JPIP_SERVER)
endif()
add_subdirectory(src/lib)
option(BUILD_LUTS_GENERATOR "Build utility to generate t1_luts.h" OFF)
if(UNIX)
option(BUILD_UNIT_TESTS "Build unit tests (bench_dwt, test_sparse_array, etc..)" OFF)
endif()

#-----------------------------------------------------------------------------
# Build Applications
Expand Down
4 changes: 2 additions & 2 deletions src/lib/openjp2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
TARGET_LINK_LIBRARIES(${OPENJPEG_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT})
endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)

if(BUILD_UNIT_TESTS)
if(BUILD_UNIT_TESTS AND UNIX)
add_executable(bench_dwt bench_dwt.c)
if(UNIX)
target_link_libraries(bench_dwt m ${OPENJPEG_LIBRARY_NAME})
Expand All @@ -215,4 +215,4 @@ if(BUILD_UNIT_TESTS)
if(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
target_link_libraries(test_sparse_array ${CMAKE_THREAD_LIBS_INIT})
endif(OPJ_USE_THREAD AND Threads_FOUND AND CMAKE_USE_PTHREADS_INIT)
endif(BUILD_UNIT_TESTS)
endif(BUILD_UNIT_TESTS AND UNIX)
56 changes: 48 additions & 8 deletions src/lib/openjp2/bench_dwt.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ void init_tilec(opj_tcd_tilecomp_t * l_tilec,
l_tilec->data[i] = getValue((OPJ_UINT32)i);
}
l_tilec->numresolutions = numresolutions;
l_tilec->minimum_num_resolutions = numresolutions;
l_tilec->resolutions = (opj_tcd_resolution_t*) opj_calloc(
l_tilec->numresolutions,
sizeof(opj_tcd_resolution_t));
Expand Down Expand Up @@ -98,9 +99,9 @@ void free_tilec(opj_tcd_tilecomp_t * l_tilec)
void usage(void)
{
printf(
"bench_dwt [-size value] [-check] [-display] [-num_resolutions val]\n");
"bench_dwt [-decode|encode] [-size value] [-check] [-display]\n");
printf(
" [-offset x y] [-num_threads val]\n");
" [-num_resolutions val] [-offset x y] [-num_threads val]\n");
exit(1);
}

Expand Down Expand Up @@ -131,6 +132,17 @@ OPJ_FLOAT64 opj_clock(void)
#endif
}

static OPJ_FLOAT64 opj_wallclock(void)
{
#ifdef _WIN32
return opj_clock();
#else
struct timeval tv;
gettimeofday(&tv, NULL);
return (OPJ_FLOAT64)tv.tv_sec + 1e-6 * (OPJ_FLOAT64)tv.tv_usec;
#endif
}

int main(int argc, char** argv)
{
int num_threads = 0;
Expand All @@ -146,12 +158,18 @@ int main(int argc, char** argv)
OPJ_BOOL check = OPJ_FALSE;
OPJ_INT32 size = 16384 - 1;
OPJ_FLOAT64 start, stop;
OPJ_FLOAT64 start_wc, stop_wc;
OPJ_UINT32 offset_x = ((OPJ_UINT32)size + 1) / 2 - 1;
OPJ_UINT32 offset_y = ((OPJ_UINT32)size + 1) / 2 - 1;
OPJ_UINT32 num_resolutions = 6;
OPJ_BOOL bench_decode = OPJ_TRUE;

for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-display") == 0) {
if (strcmp(argv[i], "-encode") == 0) {
bench_decode = OPJ_FALSE;
} else if (strcmp(argv[i], "-decode") == 0) {
bench_decode = OPJ_TRUE;
} else if (strcmp(argv[i], "-display") == 0) {
display = OPJ_TRUE;
check = OPJ_TRUE;
} else if (strcmp(argv[i], "-check") == 0) {
Expand Down Expand Up @@ -223,13 +241,26 @@ int main(int argc, char** argv)
image_comp.dy = 1;

start = opj_clock();
opj_dwt_decode(&tcd, &tilec, tilec.numresolutions);
start_wc = opj_wallclock();
if (bench_decode) {
opj_dwt_decode(&tcd, &tilec, tilec.numresolutions);
} else {
opj_dwt_encode(&tcd, &tilec);
}
stop = opj_clock();
printf("time for dwt_decode: %.03f s\n", stop - start);
stop_wc = opj_wallclock();
printf("time for %s: total = %.03f s, wallclock = %.03f s\n",
bench_decode ? "dwt_decode" : "dwt_encode",
stop - start,
stop_wc - start_wc);

if (display || check) {
if (display) {
printf("After IDWT\n");
if (bench_decode) {
printf("After IDWT\n");
} else {
printf("After FDWT\n");
}
k = 0;
for (j = 0; j < tilec.y1 - tilec.y0; j++) {
for (i = 0; i < tilec.x1 - tilec.x0; i++) {
Expand All @@ -240,9 +271,18 @@ int main(int argc, char** argv)
}
}

opj_dwt_encode(&tilec);
if (bench_decode) {
opj_dwt_encode(&tcd, &tilec);
} else {
opj_dwt_decode(&tcd, &tilec, tilec.numresolutions);
}

if (display) {
printf("After FDWT\n");
if (bench_decode) {
printf("After FDWT\n");
} else {
printf("After IDWT\n");
}
k = 0;
for (j = 0; j < tilec.y1 - tilec.y0; j++) {
for (i = 0; i < tilec.x1 - tilec.x0; i++) {
Expand Down
Loading

0 comments on commit 0dfc2d7

Please sign in to comment.