From 9b3ef199b27c8bcce21e4b44c0ee061f26e79667 Mon Sep 17 00:00:00 2001 From: Shintaro Iwasaki Date: Mon, 26 Oct 2020 22:14:33 -0500 Subject: [PATCH 1/9] [BOLT] add BOLT repository --- bolt/.github/workflows/CI.yml | 29 + bolt/.gitignore | 42 + bolt/.gitmodules | 3 + bolt/CHANGES.txt | 50 + bolt/CMakeLists.txt | 106 + bolt/CREDITS.txt | 65 + bolt/LICENSE.txt | 397 + bolt/README.md | 263 + bolt/README.rst | 340 + bolt/cmake/DetectTestCompiler/CMakeLists.txt | 48 + bolt/cmake/HandleOpenMPOptions.cmake | 35 + bolt/cmake/OpenMPTesting.cmake | 220 + bolt/cmake/config-ix.cmake | 18 + bolt/docs/ReleaseNotes.rst | 45 + bolt/examples/argobots/.gitignore | 25 + bolt/examples/argobots/Makefile | 46 + bolt/examples/argobots/Makefile.omp | 38 + bolt/examples/argobots/README | 87 + .../argobots/nested_parallel_for_abt_task.c | 224 + .../argobots/nested_parallel_for_abt_thread.c | 228 + .../nested_parallel_for_block_abt_task.c | 239 + .../nested_parallel_for_block_abt_thread.c | 241 + .../argobots/nested_parallel_for_block_omp.c | 105 + .../nested_parallel_for_irregular_abt_task.c | 245 + ...nested_parallel_for_irregular_abt_thread.c | 245 + .../nested_parallel_for_irregular_omp.c | 66 + .../argobots/nested_parallel_for_omp.c | 83 + .../examples/argobots/parallel_for_abt_task.c | 142 + .../argobots/parallel_for_abt_thread.c | 144 + bolt/examples/argobots/parallel_for_omp.c | 78 + .../task_multiple_producer_abt_task.c | 163 + .../task_multiple_producer_abt_thread.c | 163 + .../argobots/task_multiple_producer_omp.c | 90 + bolt/examples/argobots/task_nested_abt_task.c | 137 + .../argobots/task_nested_lvl2_abt_task.c | 154 + bolt/examples/argobots/task_nested_lvl2_omp.c | 121 + bolt/examples/argobots/task_nested_omp.c | 108 + .../argobots/task_single_producer_abt_task.c | 119 + .../task_single_producer_abt_thread.c | 118 + .../argobots/task_single_producer_omp.c | 90 + bolt/examples/argobots/taskwait_omp.c | 117 + bolt/examples/argobots/taskyield_omp.c | 115 + bolt/examples/sample_nested.c | 59 + bolt/examples/sample_task_multiple_producer.c | 55 + bolt/examples/sample_task_single_producer.c | 58 + bolt/external/CMakeLists.txt | 87 + .../argobots/.github/workflows/CI.yml | 42 + bolt/external/argobots/.gitignore | 60 + bolt/external/argobots/CHANGES | 58 + bolt/external/argobots/COPYRIGHT | 45 + bolt/external/argobots/Doxyfile.in | 1782 ++++ bolt/external/argobots/Makefile.am | 30 + bolt/external/argobots/README.envvar | 176 + bolt/external/argobots/README.md | 219 + bolt/external/argobots/autogen.sh | 177 + bolt/external/argobots/configure.ac | 900 ++ .../argobots/doc/coding-standards.txt | 7 + bolt/external/argobots/doc/img/es_states.png | Bin 0 -> 160258 bytes .../argobots/doc/img/tasklet_states.png | Bin 0 -> 65473 bytes bolt/external/argobots/doc/img/ult_states.png | Bin 0 -> 106245 bytes bolt/external/argobots/examples/.gitignore | 13 + bolt/external/argobots/examples/Makefile.am | 7 + bolt/external/argobots/examples/Makefile.mk | 15 + .../argobots/examples/fibonacci/.gitignore | 1 + .../argobots/examples/fibonacci/Makefile.am | 14 + .../argobots/examples/fibonacci/fibonacci.c | 152 + .../argobots/examples/hello_world/.gitignore | 2 + .../argobots/examples/hello_world/Makefile.am | 16 + .../examples/hello_world/hello_world.c | 110 + .../examples/hello_world/hello_world_ws.c | 129 + .../argobots/examples/profiling/.gitignore | 2 + .../argobots/examples/profiling/Makefile.am | 16 + .../argobots/examples/profiling/abtx_prof.h | 2841 ++++++ .../examples/profiling/async_engine.c | 361 + .../argobots/examples/profiling/daxpy.c | 234 + .../argobots/examples/scheduling/.gitignore | 6 + .../argobots/examples/scheduling/Makefile.am | 22 + .../examples/scheduling/sched_and_pool_user.c | 643 ++ .../examples/scheduling/sched_predef.c | 73 + .../examples/scheduling/sched_shared_pool.c | 67 + .../examples/scheduling/sched_stack.c | 86 + .../argobots/examples/scheduling/sched_user.c | 228 + .../argobots/examples/stencil/.gitignore | 15 + .../argobots/examples/stencil/Makefile.am | 40 + .../examples/stencil/stencil_barrier.c | 180 + .../examples/stencil/stencil_depend_future.c | 265 + .../stencil/stencil_depend_mutex_cond.c | 259 + .../examples/stencil/stencil_depend_yield.c | 248 + .../examples/stencil/stencil_forkjoin.c | 158 + .../stencil/stencil_forkjoin_divconq.c | 223 + .../stencil/stencil_forkjoin_divconq_hrws.c | 339 + .../stencil/stencil_forkjoin_divconq_rws.c | 231 + .../stencil/stencil_forkjoin_revive.c | 172 + .../examples/stencil/stencil_forkjoin_task.c | 159 + .../stencil/stencil_forkjoin_task_revive.c | 174 + .../examples/stencil/stencil_forkjoin_ws.c | 174 + .../examples/stencil/stencil_helper.h | 169 + .../argobots/examples/stencil/stencil_naive.c | 80 + .../argobots/examples/stencil/stencil_seq.c | 111 + bolt/external/argobots/m4/aclocal.m4 | 8 + bolt/external/argobots/m4/aclocal_cc.m4 | 1671 ++++ .../argobots/m4/aclocal_check_visibility.m4 | 123 + bolt/external/argobots/m4/aclocal_runlog.m4 | 218 + bolt/external/argobots/m4/aclocal_util.m4 | 216 + bolt/external/argobots/m4/ax_gcc_builtin.m4 | 176 + .../argobots/m4/ax_gcc_func_attribute.m4 | 242 + bolt/external/argobots/m4/config.rpath | 697 ++ bolt/external/argobots/maint/Version.base.m4 | 27 + bolt/external/argobots/maint/argobots.pc.in | 13 + bolt/external/argobots/maint/code-cleanup.sh | 112 + bolt/external/argobots/maint/hook/pre-commit | 76 + bolt/external/argobots/maint/release.pl | 233 + bolt/external/argobots/maint/template.c | 26 + bolt/external/argobots/maint/version.m4 | 26 + bolt/external/argobots/src/Makefile.am | 45 + bolt/external/argobots/src/arch/Makefile.mk | 19 + .../argobots/src/arch/abtd_affinity.c | 415 + .../argobots/src/arch/abtd_affinity_parser.c | 380 + bolt/external/argobots/src/arch/abtd_env.c | 322 + bolt/external/argobots/src/arch/abtd_stream.c | 110 + bolt/external/argobots/src/arch/abtd_time.c | 50 + .../external/argobots/src/arch/abtd_ythread.c | 156 + .../src/arch/fcontext/LICENSE_1_0.txt | 23 + .../arch/fcontext/jump_arm64_aapcs_elf_gas.S | 180 + .../fcontext/jump_arm64_aapcs_macho_gas.S | 111 + .../arch/fcontext/jump_i386_sysv_elf_gas.S | 85 + .../arch/fcontext/jump_i386_sysv_macho_gas.S | 80 + .../arch/fcontext/jump_ppc32_sysv_elf_gas.S | 204 + .../arch/fcontext/jump_ppc32_sysv_macho_gas.S | 199 + .../arch/fcontext/jump_ppc64_sysv_elf_gas.S | 573 ++ .../arch/fcontext/jump_ppc64_sysv_macho_gas.S | 222 + .../arch/fcontext/jump_x86_64_sysv_elf_gas.S | 141 + .../fcontext/jump_x86_64_sysv_macho_gas.S | 89 + .../arch/fcontext/make_arm64_aapcs_elf_gas.S | 86 + .../fcontext/make_arm64_aapcs_macho_gas.S | 84 + .../arch/fcontext/make_i386_sysv_elf_gas.S | 77 + .../arch/fcontext/make_i386_sysv_macho_gas.S | 66 + .../arch/fcontext/make_ppc32_sysv_elf_gas.S | 123 + .../arch/fcontext/make_ppc32_sysv_macho_gas.S | 118 + .../arch/fcontext/make_ppc64_sysv_elf_gas.S | 245 + .../arch/fcontext/make_ppc64_sysv_macho_gas.S | 140 + .../arch/fcontext/make_x86_64_sysv_elf_gas.S | 79 + .../fcontext/make_x86_64_sysv_macho_gas.S | 71 + .../arch/fcontext/take_arm64_aapcs_elf_gas.S | 95 + .../fcontext/take_arm64_aapcs_macho_gas.S | 84 + .../arch/fcontext/take_i386_sysv_elf_gas.S | 66 + .../arch/fcontext/take_i386_sysv_macho_gas.S | 61 + .../arch/fcontext/take_ppc32_sysv_elf_gas.S | 204 + .../arch/fcontext/take_ppc32_sysv_macho_gas.S | 199 + .../arch/fcontext/take_ppc64_sysv_elf_gas.S | 274 + .../arch/fcontext/take_ppc64_sysv_macho_gas.S | 222 + .../arch/fcontext/take_x86_64_sysv_elf_gas.S | 78 + .../fcontext/take_x86_64_sysv_macho_gas.S | 69 + bolt/external/argobots/src/barrier.c | 249 + bolt/external/argobots/src/cond.c | 306 + bolt/external/argobots/src/error.c | 90 + bolt/external/argobots/src/eventual.c | 292 + bolt/external/argobots/src/futures.c | 308 + bolt/external/argobots/src/global.c | 255 + .../external/argobots/src/include/Makefile.mk | 45 + bolt/external/argobots/src/include/abt.h.in | 825 ++ bolt/external/argobots/src/include/abtd.h | 104 + .../argobots/src/include/abtd_atomic.h | 1243 +++ .../argobots/src/include/abtd_context.h | 84 + .../argobots/src/include/abtd_fcontext.h | 57 + .../argobots/src/include/abtd_stream.h | 30 + .../argobots/src/include/abtd_ucontext.h | 78 + .../argobots/src/include/abtd_ythread.h | 126 + bolt/external/argobots/src/include/abti.h | 604 ++ .../argobots/src/include/abti_barrier.h | 42 + .../external/argobots/src/include/abti_cond.h | 182 + .../argobots/src/include/abti_config.h | 43 + .../argobots/src/include/abti_error.h | 328 + .../argobots/src/include/abti_eventual.h | 41 + .../argobots/src/include/abti_future.h | 41 + bolt/external/argobots/src/include/abti_key.h | 305 + .../argobots/src/include/abti_local.h | 91 + bolt/external/argobots/src/include/abti_log.h | 34 + bolt/external/argobots/src/include/abti_mem.h | 349 + .../argobots/src/include/abti_mem_pool.h | 195 + .../argobots/src/include/abti_mutex.h | 160 + .../argobots/src/include/abti_mutex_attr.h | 41 + .../external/argobots/src/include/abti_pool.h | 147 + .../argobots/src/include/abti_rwlock.h | 44 + .../argobots/src/include/abti_sched.h | 81 + .../external/argobots/src/include/abti_self.h | 34 + .../argobots/src/include/abti_spinlock.h | 36 + .../argobots/src/include/abti_stream.h | 72 + .../src/include/abti_stream_barrier.h | 43 + .../argobots/src/include/abti_sync_lifo.h | 145 + .../argobots/src/include/abti_thread.h | 79 + .../argobots/src/include/abti_thread_attr.h | 57 + .../argobots/src/include/abti_timer.h | 48 + .../external/argobots/src/include/abti_tool.h | 390 + .../argobots/src/include/abti_valgrind.h | 41 + .../argobots/src/include/abti_ythread.h | 373 + .../src/include/abti_ythread_htable.h | 177 + bolt/external/argobots/src/include/abtu.h | 220 + .../argobots/src/include/asm/Makefile.mk | 9 + .../src/include/asm/abtd_asm_int128_cas.h | 129 + bolt/external/argobots/src/info.c | 816 ++ bolt/external/argobots/src/key.c | 167 + bolt/external/argobots/src/local.c | 29 + bolt/external/argobots/src/log.c | 196 + bolt/external/argobots/src/mem/Makefile.mk | 10 + bolt/external/argobots/src/mem/malloc.c | 176 + bolt/external/argobots/src/mem/mem_pool.c | 283 + bolt/external/argobots/src/mem/valgrind.c | 110 + bolt/external/argobots/src/mutex.c | 781 ++ bolt/external/argobots/src/mutex_attr.c | 95 + bolt/external/argobots/src/pool/Makefile.mk | 10 + bolt/external/argobots/src/pool/fifo.c | 499 + bolt/external/argobots/src/pool/fifo_wait.c | 395 + bolt/external/argobots/src/pool/pool.c | 527 + bolt/external/argobots/src/rwlock.c | 173 + bolt/external/argobots/src/sched/Makefile.mk | 13 + bolt/external/argobots/src/sched/basic.c | 193 + bolt/external/argobots/src/sched/basic_wait.c | 208 + bolt/external/argobots/src/sched/config.c | 310 + bolt/external/argobots/src/sched/prio.c | 137 + bolt/external/argobots/src/sched/randws.c | 143 + bolt/external/argobots/src/sched/sched.c | 825 ++ bolt/external/argobots/src/self.c | 233 + bolt/external/argobots/src/stream.c | 1548 +++ bolt/external/argobots/src/stream_barrier.c | 106 + bolt/external/argobots/src/task.c | 496 + bolt/external/argobots/src/thread.c | 2180 +++++ bolt/external/argobots/src/thread_attr.c | 310 + bolt/external/argobots/src/timer.c | 286 + bolt/external/argobots/src/tool.c | 334 + bolt/external/argobots/src/unit.c | 55 + bolt/external/argobots/src/util/Makefile.mk | 7 + bolt/external/argobots/src/util/largepage.c | 145 + bolt/external/argobots/src/ythread.c | 134 + bolt/external/argobots/src/ythread_htable.c | 218 + bolt/external/argobots/test/.gitignore | 89 + bolt/external/argobots/test/Makefile.am | 8 + bolt/external/argobots/test/Makefile.mk | 20 + bolt/external/argobots/test/basic/Makefile.am | 202 + bolt/external/argobots/test/basic/barrier.c | 165 + bolt/external/argobots/test/basic/cond_join.c | 208 + .../argobots/test/basic/cond_signal_in_main.c | 80 + bolt/external/argobots/test/basic/cond_test.c | 226 + .../argobots/test/basic/cond_timedwait.c | 172 + .../argobots/test/basic/eventual_create.c | 112 + .../argobots/test/basic/eventual_test.c | 285 + .../external/argobots/test/basic/ext_thread.c | 240 + .../argobots/test/basic/ext_thread2.c | 105 + .../argobots/test/basic/future_create.c | 140 + .../external/argobots/test/basic/info_print.c | 146 + .../argobots/test/basic/info_stackdump.c | 210 + .../argobots/test/basic/info_stackdump2.c | 226 + .../argobots/test/basic/init_finalize.c | 76 + .../external/argobots/test/basic/main_sched.c | 178 + bolt/external/argobots/test/basic/mutex.c | 159 + .../external/argobots/test/basic/mutex_prio.c | 169 + .../argobots/test/basic/mutex_recursive.c | 155 + .../argobots/test/basic/mutex_spinlock.c | 160 + .../argobots/test/basic/mutex_unlock_se.c | 159 + .../argobots/test/basic/rwlock_reader_incl.c | 186 + .../test/basic/rwlock_reader_writer_excl.c | 202 + .../argobots/test/basic/rwlock_writer_excl.c | 195 + .../argobots/test/basic/sched_basic.c | 105 + .../argobots/test/basic/sched_basic_wait.c | 99 + .../argobots/test/basic/sched_config.c | 63 + .../argobots/test/basic/sched_on_thread.c | 137 + .../external/argobots/test/basic/sched_prio.c | 293 + .../argobots/test/basic/sched_randws.c | 198 + .../argobots/test/basic/sched_set_main.c | 146 + .../argobots/test/basic/sched_stack.c | 106 + .../argobots/test/basic/sched_user_ws.c | 323 + bolt/external/argobots/test/basic/self_type.c | 215 + .../argobots/test/basic/task_create.c | 133 + .../test/basic/task_create_on_xstream.c | 125 + bolt/external/argobots/test/basic/task_data.c | 202 + .../external/argobots/test/basic/task_data2.c | 136 + .../argobots/test/basic/task_revive.c | 172 + .../argobots/test/basic/thread_attr.c | 115 + .../argobots/test/basic/thread_create.c | 86 + .../argobots/test/basic/thread_create2.c | 108 + .../test/basic/thread_create_on_xstream.c | 104 + .../argobots/test/basic/thread_data.c | 209 + .../argobots/test/basic/thread_data2.c | 177 + .../test/basic/thread_get_last_xstream.c | 140 + bolt/external/argobots/test/basic/thread_id.c | 89 + .../argobots/test/basic/thread_migrate.c | 120 + .../argobots/test/basic/thread_revive.c | 173 + .../test/basic/thread_self_suspend_resume.c | 198 + .../argobots/test/basic/thread_task.c | 295 + .../argobots/test/basic/thread_task_arg.c | 131 + .../argobots/test/basic/thread_task_num.c | 104 + .../argobots/test/basic/thread_yield.c | 89 + .../argobots/test/basic/thread_yield_to.c | 171 + bolt/external/argobots/test/basic/timer.c | 148 + .../argobots/test/basic/xstream_affinity.c | 188 + .../argobots/test/basic/xstream_barrier.c | 115 + .../argobots/test/basic/xstream_create.c | 63 + .../argobots/test/basic/xstream_rank.c | 73 + .../argobots/test/basic/xstream_revive.c | 94 + .../argobots/test/benchmark/Makefile.am | 118 + .../argobots/test/benchmark/bench_util.h | 193 + .../argobots/test/benchmark/init_finalize.c | 103 + .../argobots/test/benchmark/sync_ops.c | 265 + .../argobots/test/benchmark/task_fork_join.c | 224 + .../argobots/test/benchmark/task_ops.c | 283 + .../argobots/test/benchmark/task_ops_all.c | 190 + .../test/benchmark/thread_fork_join.c | 261 + .../argobots/test/benchmark/thread_many_ops.c | 356 + .../argobots/test/benchmark/thread_ops.c | 505 + .../argobots/test/benchmark/thread_ops_all.c | 457 + .../argobots/test/benchmark/xstream_ops.c | 168 + .../argobots/test/scripts/create_junit.pl | 152 + bolt/external/argobots/test/util/Makefile.am | 11 + bolt/external/argobots/test/util/abttest.c | 507 + bolt/external/argobots/test/util/abttest.h | 194 + bolt/libomptarget/CMakeLists.txt | 85 + bolt/libomptarget/README.txt | 73 + .../Modules/LibomptargetGetDependencies.cmake | 248 + .../LibomptargetNVPTXBitcodeLibrary.cmake | 111 + .../cmake/Modules/LibomptargetUtils.cmake | 27 + bolt/libomptarget/deviceRTLs/CMakeLists.txt | 13 + .../deviceRTLs/amdgcn/CMakeLists.txt | 153 + .../deviceRTLs/amdgcn/src/amdgcn_interface.h | 18 + .../deviceRTLs/amdgcn/src/amdgcn_locks.hip | 28 + .../deviceRTLs/amdgcn/src/amdgcn_smid.hip | 61 + .../deviceRTLs/amdgcn/src/hip_atomics.h | 41 + .../deviceRTLs/amdgcn/src/target_impl.h | 161 + .../deviceRTLs/amdgcn/src/target_impl.hip | 72 + bolt/libomptarget/deviceRTLs/common/debug.h | 287 + .../deviceRTLs/common/device_environment.h | 24 + .../deviceRTLs/common/omptarget.h | 374 + .../deviceRTLs/common/omptargeti.h | 228 + .../deviceRTLs/common/src/cancel.cu | 28 + .../deviceRTLs/common/src/critical.cu | 28 + .../deviceRTLs/common/src/data_sharing.cu | 277 + .../deviceRTLs/common/src/libcall.cu | 364 + .../deviceRTLs/common/src/loop.cu | 756 ++ .../deviceRTLs/common/src/omp_data.cu | 68 + .../deviceRTLs/common/src/omptarget.cu | 168 + .../deviceRTLs/common/src/parallel.cu | 302 + .../deviceRTLs/common/src/reduction.cu | 314 + .../deviceRTLs/common/src/support.cu | 266 + .../deviceRTLs/common/src/sync.cu | 138 + .../deviceRTLs/common/src/task.cu | 216 + .../deviceRTLs/common/state-queue.h | 51 + .../deviceRTLs/common/state-queuei.h | 90 + bolt/libomptarget/deviceRTLs/common/support.h | 98 + .../deviceRTLs/common/target_atomic.h | 38 + bolt/libomptarget/deviceRTLs/interface.h | 462 + .../deviceRTLs/nvptx/CMakeLists.txt | 212 + .../deviceRTLs/nvptx/docs/ReductionDesign.txt | 523 + .../deviceRTLs/nvptx/src/nvptx_interface.h | 18 + .../deviceRTLs/nvptx/src/target_impl.cu | 50 + .../deviceRTLs/nvptx/src/target_impl.h | 209 + .../deviceRTLs/nvptx/test/CMakeLists.txt | 25 + .../nvptx/test/api/get_max_threads.c | 22 + .../deviceRTLs/nvptx/test/api/ignored.c | 38 + .../deviceRTLs/nvptx/test/api/max_threads.c | 53 + .../deviceRTLs/nvptx/test/api/thread_limit.c | 72 + .../nvptx/test/data_sharing/alignment.c | 55 + .../deviceRTLs/nvptx/test/lit.cfg | 76 + .../deviceRTLs/nvptx/test/lit.site.cfg.in | 15 + .../deviceRTLs/nvptx/test/parallel/barrier.c | 37 + .../deviceRTLs/nvptx/test/parallel/flush.c | 35 + .../deviceRTLs/nvptx/test/parallel/level.c | 151 + .../deviceRTLs/nvptx/test/parallel/nested.c | 136 + .../nvptx/test/parallel/num_threads.c | 102 + .../test/parallel/spmd_parallel_regions.cpp | 51 + .../nvptx/test/parallel/thread_limit.c | 77 + .../nvptx/test/parallel/tripcount.c | 22 + bolt/libomptarget/include/omptarget.h | 261 + bolt/libomptarget/include/omptargetplugin.h | 140 + bolt/libomptarget/plugins/CMakeLists.txt | 91 + .../plugins/aarch64/CMakeLists.txt | 17 + bolt/libomptarget/plugins/common/elf_common.c | 111 + bolt/libomptarget/plugins/cuda/CMakeLists.txt | 58 + bolt/libomptarget/plugins/cuda/src/rtl.cpp | 1172 +++ bolt/libomptarget/plugins/exports | 24 + .../plugins/generic-elf-64bit/src/rtl.cpp | 343 + .../libomptarget/plugins/ppc64/CMakeLists.txt | 17 + .../plugins/ppc64le/CMakeLists.txt | 17 + bolt/libomptarget/plugins/ve/CMakeLists.txt | 59 + bolt/libomptarget/plugins/ve/src/rtl.cpp | 464 + .../plugins/x86_64/CMakeLists.txt | 17 + bolt/libomptarget/src/CMakeLists.txt | 44 + bolt/libomptarget/src/api.cpp | 299 + bolt/libomptarget/src/device.cpp | 444 + bolt/libomptarget/src/device.h | 226 + bolt/libomptarget/src/exports | 31 + bolt/libomptarget/src/interface.cpp | 350 + bolt/libomptarget/src/omptarget.cpp | 824 ++ bolt/libomptarget/src/private.h | 108 + bolt/libomptarget/src/rtl.cpp | 441 + bolt/libomptarget/src/rtl.h | 195 + bolt/libomptarget/test/CMakeLists.txt | 19 + .../test/api/omp_get_num_devices.c | 36 + .../omp_get_num_devices_with_empty_target.c | 30 + bolt/libomptarget/test/env/omp_target_debug.c | 20 + bolt/libomptarget/test/lit.cfg | 155 + bolt/libomptarget/test/lit.site.cfg.in | 20 + bolt/libomptarget/test/mapping/alloc_fail.c | 26 + .../test/mapping/declare_mapper_api.cpp | 47 + .../test/mapping/delete_inf_refcount.c | 32 + bolt/libomptarget/test/mapping/pr38704.c | 47 + .../libomptarget/test/offloading/d2d_memcpy.c | 69 + .../test/offloading/dynamic_module.c | 17 + .../test/offloading/dynamic_module_load.c | 37 + .../test/offloading/looptripcnt.c | 36 + .../test/offloading/offloading_success.c | 23 + .../test/offloading/offloading_success.cpp | 23 + .../test/offloading/parallel_offloading_map.c | 41 + bolt/libomptarget/test/offloading/requires.c | 46 + .../test/offloading/target_depend_nowait.cpp | 62 + .../test/unified_shared_memory/api.c | 164 + .../unified_shared_memory/close_enter_exit.c | 95 + .../test/unified_shared_memory/close_manual.c | 86 + .../unified_shared_memory/close_modifier.c | 135 + .../unified_shared_memory/shared_update.c | 114 + bolt/maint/bolt-release.pl | 155 + bolt/maint/update-llvmomp.sh | 69 + bolt/runtime/.clang-format | 5 + bolt/runtime/CMakeLists.txt | 442 + bolt/runtime/README.txt | 117 + .../cmake/LibboltSymlinkArgobots.cmake | 13 + .../cmake/LibompCheckFortranFlag.cmake | 72 + .../runtime/cmake/LibompCheckLinkerFlag.cmake | 68 + bolt/runtime/cmake/LibompDefinitions.cmake | 30 + bolt/runtime/cmake/LibompExports.cmake | 94 + .../runtime/cmake/LibompGetArchitecture.cmake | 71 + bolt/runtime/cmake/LibompHandleFlags.cmake | 173 + bolt/runtime/cmake/LibompMicroTests.cmake | 241 + bolt/runtime/cmake/LibompUtils.cmake | 196 + bolt/runtime/cmake/config-ix.cmake | 302 + bolt/runtime/doc/Reference.pdf | Bin 0 -> 507616 bytes bolt/runtime/doc/doxygen/config | 1822 ++++ bolt/runtime/doc/doxygen/header.tex | 77 + bolt/runtime/doc/doxygen/libomp_interface.h | 331 + bolt/runtime/src/CMakeLists.txt | 374 + bolt/runtime/src/dllexports | 1196 +++ bolt/runtime/src/exports_so.txt | 125 + bolt/runtime/src/extractExternal.cpp | 483 + bolt/runtime/src/i18n/en_US.txt | 492 + bolt/runtime/src/include/omp-tools.h.var | 1082 ++ bolt/runtime/src/include/omp.h.var | 414 + bolt/runtime/src/include/omp_lib.f.var | 1056 ++ bolt/runtime/src/include/omp_lib.f90.var | 695 ++ bolt/runtime/src/include/omp_lib.h.var | 932 ++ bolt/runtime/src/kmp.h | 4012 ++++++++ bolt/runtime/src/kmp_abt.h | 96 + bolt/runtime/src/kmp_abt_affinity.cpp | 869 ++ bolt/runtime/src/kmp_affinity.cpp | 5344 ++++++++++ bolt/runtime/src/kmp_affinity.h | 844 ++ bolt/runtime/src/kmp_alloc.cpp | 2064 ++++ bolt/runtime/src/kmp_atomic.cpp | 3623 +++++++ bolt/runtime/src/kmp_atomic.h | 1767 ++++ bolt/runtime/src/kmp_barrier.cpp | 2234 +++++ bolt/runtime/src/kmp_cancel.cpp | 331 + bolt/runtime/src/kmp_config.h.cmake | 121 + bolt/runtime/src/kmp_csupport.cpp | 4240 ++++++++ bolt/runtime/src/kmp_debug.cpp | 131 + bolt/runtime/src/kmp_debug.h | 179 + bolt/runtime/src/kmp_debugger.cpp | 286 + bolt/runtime/src/kmp_debugger.h | 48 + bolt/runtime/src/kmp_dispatch.cpp | 2626 +++++ bolt/runtime/src/kmp_dispatch.h | 506 + bolt/runtime/src/kmp_dispatch_hier.h | 1106 +++ bolt/runtime/src/kmp_environment.cpp | 500 + bolt/runtime/src/kmp_environment.h | 77 + bolt/runtime/src/kmp_error.cpp | 448 + bolt/runtime/src/kmp_error.h | 60 + bolt/runtime/src/kmp_ftn_cdecl.cpp | 34 + bolt/runtime/src/kmp_ftn_entry.h | 1492 +++ bolt/runtime/src/kmp_ftn_extra.cpp | 32 + bolt/runtime/src/kmp_ftn_os.h | 683 ++ bolt/runtime/src/kmp_ftn_stdcall.cpp | 32 + bolt/runtime/src/kmp_global.cpp | 556 ++ bolt/runtime/src/kmp_gsupport.cpp | 2041 ++++ bolt/runtime/src/kmp_i18n.cpp | 871 ++ bolt/runtime/src/kmp_i18n.h | 178 + bolt/runtime/src/kmp_import.cpp | 33 + bolt/runtime/src/kmp_io.cpp | 229 + bolt/runtime/src/kmp_io.h | 38 + bolt/runtime/src/kmp_itt.cpp | 160 + bolt/runtime/src/kmp_itt.h | 332 + bolt/runtime/src/kmp_itt.inl | 1042 ++ bolt/runtime/src/kmp_lock.cpp | 4181 ++++++++ bolt/runtime/src/kmp_lock.h | 1530 +++ bolt/runtime/src/kmp_omp.h | 235 + bolt/runtime/src/kmp_os.h | 1042 ++ bolt/runtime/src/kmp_platform.h | 210 + bolt/runtime/src/kmp_runtime.cpp | 8661 +++++++++++++++++ bolt/runtime/src/kmp_safe_c_api.h | 74 + bolt/runtime/src/kmp_sched.cpp | 1004 ++ bolt/runtime/src/kmp_settings.cpp | 5787 +++++++++++ bolt/runtime/src/kmp_settings.h | 67 + bolt/runtime/src/kmp_stats.cpp | 922 ++ bolt/runtime/src/kmp_stats.h | 1010 ++ bolt/runtime/src/kmp_stats_timing.cpp | 130 + bolt/runtime/src/kmp_stats_timing.h | 115 + bolt/runtime/src/kmp_str.cpp | 751 ++ bolt/runtime/src/kmp_str.h | 125 + bolt/runtime/src/kmp_stub.cpp | 385 + bolt/runtime/src/kmp_stub.h | 55 + bolt/runtime/src/kmp_taskdeps.cpp | 813 ++ bolt/runtime/src/kmp_taskdeps.h | 145 + bolt/runtime/src/kmp_tasking.cpp | 4678 +++++++++ bolt/runtime/src/kmp_threadprivate.cpp | 799 ++ bolt/runtime/src/kmp_utility.cpp | 409 + bolt/runtime/src/kmp_version.cpp | 198 + bolt/runtime/src/kmp_version.h | 66 + bolt/runtime/src/kmp_wait_release.cpp | 29 + bolt/runtime/src/kmp_wait_release.h | 939 ++ bolt/runtime/src/kmp_wrapper_getpid.h | 82 + bolt/runtime/src/kmp_wrapper_malloc.h | 196 + bolt/runtime/src/libomp.rc.var | 69 + bolt/runtime/src/ompt-event-specific.h | 106 + bolt/runtime/src/ompt-general.cpp | 745 ++ bolt/runtime/src/ompt-internal.h | 125 + bolt/runtime/src/ompt-specific.cpp | 504 + bolt/runtime/src/ompt-specific.h | 131 + bolt/runtime/src/test-touch.c | 30 + .../thirdparty/ittnotify/disable_warnings.h | 29 + .../src/thirdparty/ittnotify/ittnotify.h | 4075 ++++++++ .../thirdparty/ittnotify/ittnotify_config.h | 595 ++ .../thirdparty/ittnotify/ittnotify_static.cpp | 1201 +++ .../thirdparty/ittnotify/ittnotify_static.h | 341 + .../thirdparty/ittnotify/ittnotify_types.h | 67 + .../thirdparty/ittnotify/legacy/ittnotify.h | 991 ++ bolt/runtime/src/tsan_annotations.cpp | 107 + bolt/runtime/src/tsan_annotations.h | 169 + bolt/runtime/src/z_Linux_asm.S | 1770 ++++ bolt/runtime/src/z_Linux_util.cpp | 4277 ++++++++ bolt/runtime/src/z_Windows_NT-586_asm.asm | 1298 +++ bolt/runtime/src/z_Windows_NT-586_util.cpp | 135 + bolt/runtime/src/z_Windows_NT_util.cpp | 1621 +++ bolt/runtime/test/CMakeLists.txt | 53 + bolt/runtime/test/affinity/bug-nested.c | 32 + .../test/affinity/format/affinity_display.1.c | 93 + .../test/affinity/format/affinity_values.c | 135 + bolt/runtime/test/affinity/format/api.c | 57 + bolt/runtime/test/affinity/format/api2.c | 85 + bolt/runtime/test/affinity/format/check.py | 73 + .../test/affinity/format/fields_modifiers.c | 118 + .../test/affinity/format/fields_values.c | 153 + bolt/runtime/test/affinity/format/increase.c | 37 + .../test/affinity/format/lit.local.cfg | 0 bolt/runtime/test/affinity/format/nested.c | 23 + bolt/runtime/test/affinity/format/nested2.c | 30 + .../test/affinity/format/nested_mixed.c | 47 + .../test/affinity/format/nested_serial.c | 36 + bolt/runtime/test/affinity/format/proc_bind.c | 31 + bolt/runtime/test/affinity/format/simple.c | 28 + .../runtime/test/affinity/format/simple_env.c | 17 + bolt/runtime/test/api/has_openmp.c | 23 + bolt/runtime/test/api/kmp_aligned_malloc.c | 62 + .../test/api/kmp_set_defaults_lock_bug.c | 53 + bolt/runtime/test/api/omp_alloc_def_fb.c | 32 + bolt/runtime/test/api/omp_alloc_hbw.c | 45 + bolt/runtime/test/api/omp_alloc_null_fb.c | 35 + bolt/runtime/test/api/omp_display_env0.c | 14 + bolt/runtime/test/api/omp_get_num_devices.c | 25 + bolt/runtime/test/api/omp_get_num_threads.c | 39 + bolt/runtime/test/api/omp_get_wtick.c | 24 + bolt/runtime/test/api/omp_get_wtime.c | 33 + bolt/runtime/test/api/omp_in_parallel.c | 44 + bolt/runtime/test/api/omp_pause_resource.c | 58 + bolt/runtime/test/atomic/omp_atomic.c | 366 + bolt/runtime/test/barrier/omp_barrier.c | 44 + .../test/bolt/interop/init_then_openmp.c | 37 + .../test/bolt/interop/openmp_then_init.c | 37 + .../test/bolt/misc_bugs/untied_tasks.c | 61 + .../bolt/scheduling/bolt_scheduling_util.h | 77 + .../bolt/scheduling/for_nowait_scheduling.c | 47 + .../bolt/scheduling/task_tied_scheduling.c | 49 + .../scheduling/task_tied_thread_scheduling.c | 76 + .../bolt/scheduling/task_unitied_scheduling.c | 69 + .../task_untied_thread_scheduling.c | 74 + .../taskdep_taskgroup_tied_scheduling.c | 123 + .../taskdep_taskgroup_untied_scheduling.c | 123 + ...askdep_taskgroup_untied_yield_scheduling.c | 127 + .../taskdep_taskwait_tied_scheduling.c | 121 + .../taskdep_taskwait_untied_scheduling.c | 121 + ...taskdep_taskwait_untied_yield_scheduling.c | 125 + .../bolt/scheduling/taskdep_tied_scheduling.c | 122 + .../scheduling/taskdep_untied_scheduling.c | 122 + .../taskdep_untied_yield_scheduling.c | 126 + .../taskloop_nogroup_tied_scheduling.c | 48 + .../taskloop_nogroup_untied_scheduling.c | 70 + .../scheduling/taskloop_tied_scheduling.c | 47 + .../scheduling/taskloop_untied_scheduling.c | 68 + .../test/bolt/scheduling/thread_scheduling.c | 46 + .../scheduling/thread_thread_scheduling.c | 58 + .../bolt/threadid/task_tied_thread_threadid.c | 103 + .../test/bolt/threadid/task_tied_threadid.c | 67 + .../threadid/task_unitied_thread_threadid.c | 98 + .../test/bolt/threadid/task_untied_threadid.c | 67 + .../bolt/threadid/task_untied_threadid2.c | 65 + .../bolt/threadid/taskdep_tied_threadid.c | 142 + .../bolt/threadid/taskdep_untied_threadid.c | 142 + .../bolt/threadid/taskdep_untied_threadid2.c | 138 + .../bolt/threadid/taskloop_tied_threadid.c | 67 + .../bolt/threadid/taskloop_untied_threadid.c | 70 + .../bolt/threadid/thread_thread_threadid.c | 95 + .../test/bolt/threadid/thread_threadid.c | 61 + bolt/runtime/test/critical/omp_critical.c | 37 + bolt/runtime/test/env/kmp_aff_disable_hwloc.c | 21 + bolt/runtime/test/env/kmp_set_dispatch_buf.c | 82 + bolt/runtime/test/env/omp_target_offload.c | 62 + bolt/runtime/test/env/omp_thread_limit.c | 82 + bolt/runtime/test/env/omp_wait_policy.c | 40 + bolt/runtime/test/flush/omp_flush.c | 50 + bolt/runtime/test/lit.cfg | 156 + bolt/runtime/test/lit.site.cfg.in | 22 + bolt/runtime/test/lock/omp_init_lock.c | 43 + bolt/runtime/test/lock/omp_lock.c | 47 + bolt/runtime/test/lock/omp_nest_lock.c | 45 + bolt/runtime/test/lock/omp_test_lock.c | 47 + bolt/runtime/test/lock/omp_test_nest_lock.c | 47 + bolt/runtime/test/master/omp_master.c | 38 + bolt/runtime/test/master/omp_master_3.c | 44 + .../misc_bugs/cancellation_for_sections.c | 63 + .../test/misc_bugs/for-task-for-task.c | 76 + bolt/runtime/test/misc_bugs/for-task-for.c | 69 + .../test/misc_bugs/many-microtask-args.c | 39 + .../misc_bugs/omp_foreign_thread_team_reuse.c | 82 + bolt/runtime/test/misc_bugs/stack-propagate.c | 66 + bolt/runtime/test/misc_bugs/teams-no-par.c | 64 + bolt/runtime/test/misc_bugs/teams-reduction.c | 68 + bolt/runtime/test/omp_my_sleep.h | 33 + bolt/runtime/test/omp_testsuite.h | 96 + bolt/runtime/test/ompt/callback.h | 1153 +++ .../test/ompt/cancel/cancel_parallel.c | 40 + .../test/ompt/cancel/cancel_taskgroup.c | 89 + .../test/ompt/cancel/cancel_worksharing.c | 67 + .../loadtool/tool_available/tool_available.c | 74 + .../tool_available_search.c | 104 + .../tool_not_available/tool_not_available.c | 69 + .../ompt/misc/api_calls_from_other_thread.cpp | 92 + bolt/runtime/test/ompt/misc/api_calls_misc.c | 72 + .../runtime/test/ompt/misc/api_calls_places.c | 88 + .../test/ompt/misc/api_calls_without_ompt.c | 148 + bolt/runtime/test/ompt/misc/control_tool.c | 29 + .../ompt/misc/control_tool_no_ompt_support.c | 13 + bolt/runtime/test/ompt/misc/finalize_tool.c | 28 + .../test/ompt/misc/interoperability.cpp | 122 + bolt/runtime/test/ompt/misc/threads.c | 34 + bolt/runtime/test/ompt/misc/threads_nested.c | 40 + bolt/runtime/test/ompt/misc/unset_callback.c | 29 + bolt/runtime/test/ompt/ompt-signal.h | 31 + .../ompt/parallel/dynamic_enough_threads.c | 43 + .../parallel/dynamic_not_enough_threads.c | 43 + .../parallel/max_active_levels_serialized.c | 73 + bolt/runtime/test/ompt/parallel/nested.c | 298 + bolt/runtime/test/ompt/parallel/nested_lwt.c | 334 + .../test/ompt/parallel/nested_serialized.c | 128 + .../test/ompt/parallel/nested_thread_num.c | 357 + .../test/ompt/parallel/nested_threadnum.c | 62 + .../test/ompt/parallel/no_thread_num_clause.c | 96 + bolt/runtime/test/ompt/parallel/normal.c | 135 + .../test/ompt/parallel/not_enough_threads.c | 90 + .../runtime/test/ompt/parallel/parallel_if0.c | 76 + bolt/runtime/test/ompt/parallel/serialized.c | 77 + .../ompt/synchronization/barrier/explicit.c | 58 + .../ompt/synchronization/barrier/for_loop.c | 56 + .../ompt/synchronization/barrier/for_simd.c | 33 + .../barrier/implicit_task_data.c | 154 + .../synchronization/barrier/parallel_region.c | 40 + .../ompt/synchronization/barrier/sections.c | 63 + .../ompt/synchronization/barrier/single.c | 61 + .../test/ompt/synchronization/critical.c | 32 + .../runtime/test/ompt/synchronization/flush.c | 30 + bolt/runtime/test/ompt/synchronization/lock.c | 44 + .../test/ompt/synchronization/master.c | 38 + .../test/ompt/synchronization/nest_lock.c | 52 + .../test/ompt/synchronization/ordered.c | 32 + .../synchronization/ordered_dependences.c | 61 + .../synchronization/reduction/empty_reduce.c | 38 + .../synchronization/reduction/tree_reduce.c | 52 + .../test/ompt/synchronization/taskgroup.c | 49 + .../test/ompt/synchronization/taskwait.c | 36 + .../test/ompt/synchronization/test_lock.c | 54 + .../ompt/synchronization/test_nest_lock.c | 42 + .../synchronization/test_nest_lock_parallel.c | 60 + bolt/runtime/test/ompt/tasks/dependences.c | 88 + .../ompt/tasks/dependences_mutexinoutset.c | 121 + bolt/runtime/test/ompt/tasks/explicit_task.c | 102 + bolt/runtime/test/ompt/tasks/serialized.c | 152 + .../test/ompt/tasks/task_early_fulfill.c | 68 + .../test/ompt/tasks/task_in_joinbarrier.c | 91 + .../test/ompt/tasks/task_late_fulfill.c | 83 + bolt/runtime/test/ompt/tasks/task_memory.c | 108 + bolt/runtime/test/ompt/tasks/task_types.c | 220 + .../test/ompt/tasks/task_types_serialized.c | 114 + bolt/runtime/test/ompt/tasks/taskloop.c | 80 + .../runtime/test/ompt/tasks/taskwait-depend.c | 80 + bolt/runtime/test/ompt/tasks/taskyield.c | 62 + bolt/runtime/test/ompt/tasks/untied_task.c | 108 + bolt/runtime/test/ompt/teams/parallel_team.c | 81 + bolt/runtime/test/ompt/teams/serial_teams.c | 89 + bolt/runtime/test/ompt/teams/serialized.c | 62 + bolt/runtime/test/ompt/teams/team.c | 49 + bolt/runtime/test/ompt/worksharing/for/auto.c | 7 + .../ompt/worksharing/for/auto_serialized.c | 7 + .../test/ompt/worksharing/for/auto_split.c | 8 + bolt/runtime/test/ompt/worksharing/for/base.h | 43 + .../ompt/worksharing/for/base_serialized.h | 28 + .../test/ompt/worksharing/for/base_split.h | 66 + .../test/ompt/worksharing/for/dynamic.c | 5 + .../ompt/worksharing/for/dynamic_serialized.c | 5 + .../test/ompt/worksharing/for/dynamic_split.c | 7 + .../test/ompt/worksharing/for/guided.c | 5 + .../ompt/worksharing/for/guided_serialized.c | 5 + .../test/ompt/worksharing/for/guided_split.c | 7 + .../test/ompt/worksharing/for/runtime.c | 5 + .../ompt/worksharing/for/runtime_serialized.c | 5 + .../test/ompt/worksharing/for/runtime_split.c | 7 + .../test/ompt/worksharing/for/static.c | 7 + .../ompt/worksharing/for/static_serialized.c | 7 + .../test/ompt/worksharing/for/static_split.c | 8 + bolt/runtime/test/ompt/worksharing/sections.c | 36 + bolt/runtime/test/ompt/worksharing/single.c | 36 + bolt/runtime/test/parallel/omp_nested.c | 49 + .../test/parallel/omp_parallel_copyin.c | 48 + .../test/parallel/omp_parallel_default.c | 43 + .../test/parallel/omp_parallel_firstprivate.c | 46 + bolt/runtime/test/parallel/omp_parallel_if.c | 40 + .../test/parallel/omp_parallel_num_threads.c | 46 + .../test/parallel/omp_parallel_private.c | 46 + .../test/parallel/omp_parallel_reduction.c | 254 + .../test/parallel/omp_parallel_shared.c | 46 + bolt/runtime/test/tasking/bug_36720.c | 36 + .../test/tasking/bug_nested_proxy_task.c | 136 + .../test/tasking/bug_proxy_task_dep_waiting.c | 139 + .../test/tasking/bug_serial_taskgroup.c | 16 + .../test/tasking/kmp_detach_tasks_t1.c | 114 + .../test/tasking/kmp_detach_tasks_t2.c | 117 + .../test/tasking/kmp_detach_tasks_t3.c | 139 + .../kmp_task_modifier_simple_par_new.cpp | 99 + .../kmp_task_modifier_simple_par_old.cpp | 93 + .../kmp_task_modifier_simple_ws_new.cpp | 114 + .../kmp_task_modifier_simple_ws_old.cpp | 108 + .../test/tasking/kmp_task_reduction_nest.cpp | 376 + bolt/runtime/test/tasking/kmp_taskloop.c | 159 + .../test/tasking/nested_parallel_tasking.c | 32 + .../test/tasking/nested_task_creation.c | 35 + .../test/tasking/omp50_task_depend_mtx.c | 153 + .../test/tasking/omp50_task_depend_mtx2.c | 156 + .../test/tasking/omp_detach_taskwait.c | 27 + .../runtime/test/tasking/omp_fill_taskqueue.c | 65 + bolt/runtime/test/tasking/omp_task.c | 55 + bolt/runtime/test/tasking/omp_task_depend.c | 91 + .../tasking/omp_task_depend_resize_hashmap.c | 46 + bolt/runtime/test/tasking/omp_task_final.c | 65 + .../test/tasking/omp_task_firstprivate.c | 51 + bolt/runtime/test/tasking/omp_task_if.c | 43 + .../test/tasking/omp_task_imp_firstprivate.c | 47 + .../runtime/test/tasking/omp_task_nest_tied.c | 57 + .../test/tasking/omp_task_nest_untied.c | 58 + bolt/runtime/test/tasking/omp_task_priority.c | 22 + bolt/runtime/test/tasking/omp_task_private.c | 53 + .../test/tasking/omp_task_red_taskloop.c | 66 + bolt/runtime/test/tasking/omp_task_shared.c | 41 + .../test/tasking/omp_taskloop_grainsize.c | 113 + .../test/tasking/omp_taskloop_num_tasks.c | 77 + .../test/tasking/omp_taskloop_taskwait.c | 30 + bolt/runtime/test/tasking/omp_taskwait.c | 78 + bolt/runtime/test/tasking/omp_taskyield.c | 61 + .../runtime/test/tasking/omp_taskyield_tied.c | 58 + .../test/threadprivate/omp_threadprivate.c | 103 + .../threadprivate/omp_threadprivate_for.c | 49 + .../test/worksharing/for/bug_set_schedule_0.c | 41 + .../test/worksharing/for/kmp_doacross_check.c | 62 + .../worksharing/for/kmp_sch_simd_guided.c | 415 + .../for/kmp_sch_simd_runtime_api.c | 222 + .../for/kmp_sch_simd_runtime_guided.c | 197 + .../for/kmp_sch_simd_runtime_static.c | 202 + .../worksharing/for/kmp_set_dispatch_buf.c | 92 + .../test/worksharing/for/omp_doacross.c | 62 + .../test/worksharing/for/omp_for_bigbounds.c | 72 + .../test/worksharing/for/omp_for_collapse.c | 52 + .../worksharing/for/omp_for_collapse_mini.c | 51 + .../worksharing/for/omp_for_firstprivate.c | 56 + .../omp_for_firstprivate_nothreadprivate.c | 52 + .../worksharing/for/omp_for_lastprivate.c | 53 + .../for/omp_for_lastprivate_nothreadprivate.c | 49 + .../test/worksharing/for/omp_for_nowait.c | 78 + .../test/worksharing/for/omp_for_ordered.c | 60 + .../test/worksharing/for/omp_for_private.c | 64 + .../for/omp_for_private_nothreadprivate.c | 60 + .../test/worksharing/for/omp_for_reduction.c | 339 + .../worksharing/for/omp_for_schedule_auto.c | 70 + .../omp_for_schedule_auto_nothreadprivate.c | 66 + .../for/omp_for_schedule_dynamic.c | 89 + .../worksharing/for/omp_for_schedule_guided.c | 217 + .../for/omp_for_schedule_runtime.c | 83 + .../worksharing/for/omp_for_schedule_static.c | 154 + .../for/omp_for_schedule_static_3.c | 202 + .../test/worksharing/for/omp_monotonic_env.c | 86 + .../for/omp_monotonic_schedule_set_get.c | 134 + .../for/omp_nonmonotonic_dynamic1.c | 40 + .../worksharing/for/omp_nonmonotonic_nowait.c | 34 + .../for/omp_parallel_for_firstprivate.c | 35 + .../worksharing/for/omp_parallel_for_if.c | 42 + .../for/omp_parallel_for_lastprivate.c | 37 + .../for/omp_parallel_for_ordered.c | 65 + ...omp_parallel_for_ordered_nothreadprivate.c | 55 + .../for/omp_parallel_for_private.c | 50 + .../for/omp_parallel_for_reduction.c | 266 + .../omp_parallel_sections_firstprivate.c | 54 + .../omp_parallel_sections_lastprivate.c | 71 + .../sections/omp_parallel_sections_private.c | 64 + .../omp_parallel_sections_reduction.c | 508 + .../sections/omp_section_firstprivate.c | 55 + .../sections/omp_section_lastprivate.c | 76 + .../sections/omp_section_private.c | 66 + .../sections/omp_sections_nowait.c | 105 + .../sections/omp_sections_reduction.c | 543 ++ .../test/worksharing/single/omp_single.c | 44 + .../single/omp_single_copyprivate.c | 61 + .../omp_single_copyprivate_nothreadprivate.c | 58 + .../worksharing/single/omp_single_nowait.c | 74 + .../worksharing/single/omp_single_private.c | 58 + .../omp_single_private_nothreadprivate.c | 51 + bolt/runtime/tools/check-depends.pl | 505 + bolt/runtime/tools/check-execstack.pl | 145 + bolt/runtime/tools/check-instruction-set.pl | 320 + bolt/runtime/tools/generate-def.pl | 323 + bolt/runtime/tools/lib/Build.pm | 263 + bolt/runtime/tools/lib/LibOMP.pm | 84 + bolt/runtime/tools/lib/Platform.pm | 488 + bolt/runtime/tools/lib/Uname.pm | 640 ++ bolt/runtime/tools/lib/tools.pm | 1980 ++++ bolt/runtime/tools/message-converter.pl | 774 ++ bolt/runtime/tools/summarizeStats.py | 323 + bolt/tools/CMakeLists.txt | 9 + bolt/tools/archer/CMakeLists.txt | 37 + bolt/tools/archer/README.md | 207 + bolt/tools/archer/ompt-tsan.cpp | 951 ++ bolt/tools/archer/tests/CMakeLists.txt | 37 + bolt/tools/archer/tests/barrier/barrier.c | 42 + bolt/tools/archer/tests/critical/critical.c | 36 + .../tools/archer/tests/critical/lock-nested.c | 44 + bolt/tools/archer/tests/critical/lock.c | 42 + bolt/tools/archer/tests/deflake.bash | 17 + bolt/tools/archer/tests/lit.cfg | 129 + bolt/tools/archer/tests/lit.site.cfg.in | 21 + bolt/tools/archer/tests/ompt/ompt-signal.h | 42 + .../tests/parallel/parallel-firstprivate.c | 33 + .../archer/tests/parallel/parallel-simple.c | 39 + .../archer/tests/parallel/parallel-simple2.c | 44 + .../archer/tests/races/critical-unrelated.c | 41 + .../tests/races/lock-nested-unrelated.c | 47 + .../tools/archer/tests/races/lock-unrelated.c | 47 + .../archer/tests/races/parallel-simple.c | 36 + .../archer/tests/races/task-dependency.c | 60 + .../tests/races/task-taskgroup-unrelated.c | 61 + .../archer/tests/races/task-taskwait-nested.c | 58 + bolt/tools/archer/tests/races/task-two.c | 44 + .../reduction/parallel-reduction-nowait.c | 46 + .../tests/reduction/parallel-reduction.c | 35 + bolt/tools/archer/tests/task/task-barrier.c | 52 + bolt/tools/archer/tests/task/task-create.c | 46 + .../tools/archer/tests/task/task-dependency.c | 54 + .../archer/tests/task/task-taskgroup-nested.c | 53 + bolt/tools/archer/tests/task/task-taskgroup.c | 50 + .../archer/tests/task/task-taskwait-nested.c | 53 + bolt/tools/archer/tests/task/task-taskwait.c | 50 + bolt/tools/archer/tests/worksharing/ordered.c | 39 + bolt/tools/multiplex/CMakeLists.txt | 10 + bolt/tools/multiplex/README.md | 60 + bolt/tools/multiplex/ompt-multiplex.h | 1094 +++ bolt/tools/multiplex/tests/CMakeLists.txt | 21 + .../custom_data_storage/custom_data_storage.c | 317 + .../tests/custom_data_storage/first-tool.h | 293 + .../tests/custom_data_storage/second-tool.h | 5 + bolt/tools/multiplex/tests/lit.cfg | 93 + bolt/tools/multiplex/tests/lit.site.cfg.in | 16 + bolt/tools/multiplex/tests/ompt-signal.h | 23 + bolt/tools/multiplex/tests/print/first-tool.h | 5 + bolt/tools/multiplex/tests/print/print.c | 308 + .../tools/multiplex/tests/print/second-tool.h | 5 + bolt/www/README.txt | 117 + bolt/www/Reference.pdf | Bin 0 -> 507616 bytes bolt/www/content.css | 27 + bolt/www/index.html | 227 + bolt/www/menu.css | 39 + 886 files changed, 214440 insertions(+) create mode 100644 bolt/.github/workflows/CI.yml create mode 100644 bolt/.gitignore create mode 100644 bolt/.gitmodules create mode 100644 bolt/CHANGES.txt create mode 100644 bolt/CMakeLists.txt create mode 100644 bolt/CREDITS.txt create mode 100644 bolt/LICENSE.txt create mode 100644 bolt/README.md create mode 100644 bolt/README.rst create mode 100644 bolt/cmake/DetectTestCompiler/CMakeLists.txt create mode 100644 bolt/cmake/HandleOpenMPOptions.cmake create mode 100644 bolt/cmake/OpenMPTesting.cmake create mode 100644 bolt/cmake/config-ix.cmake create mode 100644 bolt/docs/ReleaseNotes.rst create mode 100644 bolt/examples/argobots/.gitignore create mode 100644 bolt/examples/argobots/Makefile create mode 100644 bolt/examples/argobots/Makefile.omp create mode 100644 bolt/examples/argobots/README create mode 100644 bolt/examples/argobots/nested_parallel_for_abt_task.c create mode 100644 bolt/examples/argobots/nested_parallel_for_abt_thread.c create mode 100644 bolt/examples/argobots/nested_parallel_for_block_abt_task.c create mode 100644 bolt/examples/argobots/nested_parallel_for_block_abt_thread.c create mode 100644 bolt/examples/argobots/nested_parallel_for_block_omp.c create mode 100644 bolt/examples/argobots/nested_parallel_for_irregular_abt_task.c create mode 100644 bolt/examples/argobots/nested_parallel_for_irregular_abt_thread.c create mode 100644 bolt/examples/argobots/nested_parallel_for_irregular_omp.c create mode 100644 bolt/examples/argobots/nested_parallel_for_omp.c create mode 100644 bolt/examples/argobots/parallel_for_abt_task.c create mode 100644 bolt/examples/argobots/parallel_for_abt_thread.c create mode 100644 bolt/examples/argobots/parallel_for_omp.c create mode 100644 bolt/examples/argobots/task_multiple_producer_abt_task.c create mode 100644 bolt/examples/argobots/task_multiple_producer_abt_thread.c create mode 100644 bolt/examples/argobots/task_multiple_producer_omp.c create mode 100644 bolt/examples/argobots/task_nested_abt_task.c create mode 100644 bolt/examples/argobots/task_nested_lvl2_abt_task.c create mode 100644 bolt/examples/argobots/task_nested_lvl2_omp.c create mode 100644 bolt/examples/argobots/task_nested_omp.c create mode 100644 bolt/examples/argobots/task_single_producer_abt_task.c create mode 100644 bolt/examples/argobots/task_single_producer_abt_thread.c create mode 100644 bolt/examples/argobots/task_single_producer_omp.c create mode 100644 bolt/examples/argobots/taskwait_omp.c create mode 100644 bolt/examples/argobots/taskyield_omp.c create mode 100644 bolt/examples/sample_nested.c create mode 100644 bolt/examples/sample_task_multiple_producer.c create mode 100644 bolt/examples/sample_task_single_producer.c create mode 100644 bolt/external/CMakeLists.txt create mode 100644 bolt/external/argobots/.github/workflows/CI.yml create mode 100644 bolt/external/argobots/.gitignore create mode 100644 bolt/external/argobots/CHANGES create mode 100644 bolt/external/argobots/COPYRIGHT create mode 100644 bolt/external/argobots/Doxyfile.in create mode 100644 bolt/external/argobots/Makefile.am create mode 100644 bolt/external/argobots/README.envvar create mode 100644 bolt/external/argobots/README.md create mode 100755 bolt/external/argobots/autogen.sh create mode 100644 bolt/external/argobots/configure.ac create mode 100644 bolt/external/argobots/doc/coding-standards.txt create mode 100644 bolt/external/argobots/doc/img/es_states.png create mode 100644 bolt/external/argobots/doc/img/tasklet_states.png create mode 100644 bolt/external/argobots/doc/img/ult_states.png create mode 100644 bolt/external/argobots/examples/.gitignore create mode 100644 bolt/external/argobots/examples/Makefile.am create mode 100644 bolt/external/argobots/examples/Makefile.mk create mode 100644 bolt/external/argobots/examples/fibonacci/.gitignore create mode 100644 bolt/external/argobots/examples/fibonacci/Makefile.am create mode 100644 bolt/external/argobots/examples/fibonacci/fibonacci.c create mode 100644 bolt/external/argobots/examples/hello_world/.gitignore create mode 100644 bolt/external/argobots/examples/hello_world/Makefile.am create mode 100644 bolt/external/argobots/examples/hello_world/hello_world.c create mode 100644 bolt/external/argobots/examples/hello_world/hello_world_ws.c create mode 100644 bolt/external/argobots/examples/profiling/.gitignore create mode 100644 bolt/external/argobots/examples/profiling/Makefile.am create mode 100644 bolt/external/argobots/examples/profiling/abtx_prof.h create mode 100644 bolt/external/argobots/examples/profiling/async_engine.c create mode 100644 bolt/external/argobots/examples/profiling/daxpy.c create mode 100644 bolt/external/argobots/examples/scheduling/.gitignore create mode 100644 bolt/external/argobots/examples/scheduling/Makefile.am create mode 100644 bolt/external/argobots/examples/scheduling/sched_and_pool_user.c create mode 100644 bolt/external/argobots/examples/scheduling/sched_predef.c create mode 100644 bolt/external/argobots/examples/scheduling/sched_shared_pool.c create mode 100644 bolt/external/argobots/examples/scheduling/sched_stack.c create mode 100644 bolt/external/argobots/examples/scheduling/sched_user.c create mode 100644 bolt/external/argobots/examples/stencil/.gitignore create mode 100644 bolt/external/argobots/examples/stencil/Makefile.am create mode 100644 bolt/external/argobots/examples/stencil/stencil_barrier.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_depend_future.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_depend_mutex_cond.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_depend_yield.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_forkjoin.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_forkjoin_divconq.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_forkjoin_divconq_hrws.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_forkjoin_divconq_rws.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_forkjoin_revive.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_forkjoin_task.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_forkjoin_task_revive.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_forkjoin_ws.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_helper.h create mode 100644 bolt/external/argobots/examples/stencil/stencil_naive.c create mode 100644 bolt/external/argobots/examples/stencil/stencil_seq.c create mode 100644 bolt/external/argobots/m4/aclocal.m4 create mode 100644 bolt/external/argobots/m4/aclocal_cc.m4 create mode 100644 bolt/external/argobots/m4/aclocal_check_visibility.m4 create mode 100644 bolt/external/argobots/m4/aclocal_runlog.m4 create mode 100644 bolt/external/argobots/m4/aclocal_util.m4 create mode 100644 bolt/external/argobots/m4/ax_gcc_builtin.m4 create mode 100644 bolt/external/argobots/m4/ax_gcc_func_attribute.m4 create mode 100755 bolt/external/argobots/m4/config.rpath create mode 100644 bolt/external/argobots/maint/Version.base.m4 create mode 100644 bolt/external/argobots/maint/argobots.pc.in create mode 100755 bolt/external/argobots/maint/code-cleanup.sh create mode 100755 bolt/external/argobots/maint/hook/pre-commit create mode 100755 bolt/external/argobots/maint/release.pl create mode 100644 bolt/external/argobots/maint/template.c create mode 100644 bolt/external/argobots/maint/version.m4 create mode 100644 bolt/external/argobots/src/Makefile.am create mode 100644 bolt/external/argobots/src/arch/Makefile.mk create mode 100644 bolt/external/argobots/src/arch/abtd_affinity.c create mode 100644 bolt/external/argobots/src/arch/abtd_affinity_parser.c create mode 100644 bolt/external/argobots/src/arch/abtd_env.c create mode 100644 bolt/external/argobots/src/arch/abtd_stream.c create mode 100644 bolt/external/argobots/src/arch/abtd_time.c create mode 100644 bolt/external/argobots/src/arch/abtd_ythread.c create mode 100644 bolt/external/argobots/src/arch/fcontext/LICENSE_1_0.txt create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_arm64_aapcs_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_arm64_aapcs_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_i386_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_i386_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_ppc32_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_ppc32_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_ppc64_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_ppc64_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_x86_64_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/jump_x86_64_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_arm64_aapcs_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_arm64_aapcs_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_i386_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_i386_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_ppc32_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_ppc32_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_ppc64_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_ppc64_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_x86_64_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/make_x86_64_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_arm64_aapcs_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_arm64_aapcs_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_i386_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_i386_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_ppc32_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_ppc32_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_ppc64_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_ppc64_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_x86_64_sysv_elf_gas.S create mode 100644 bolt/external/argobots/src/arch/fcontext/take_x86_64_sysv_macho_gas.S create mode 100644 bolt/external/argobots/src/barrier.c create mode 100644 bolt/external/argobots/src/cond.c create mode 100644 bolt/external/argobots/src/error.c create mode 100644 bolt/external/argobots/src/eventual.c create mode 100644 bolt/external/argobots/src/futures.c create mode 100644 bolt/external/argobots/src/global.c create mode 100644 bolt/external/argobots/src/include/Makefile.mk create mode 100644 bolt/external/argobots/src/include/abt.h.in create mode 100644 bolt/external/argobots/src/include/abtd.h create mode 100644 bolt/external/argobots/src/include/abtd_atomic.h create mode 100644 bolt/external/argobots/src/include/abtd_context.h create mode 100644 bolt/external/argobots/src/include/abtd_fcontext.h create mode 100644 bolt/external/argobots/src/include/abtd_stream.h create mode 100644 bolt/external/argobots/src/include/abtd_ucontext.h create mode 100644 bolt/external/argobots/src/include/abtd_ythread.h create mode 100644 bolt/external/argobots/src/include/abti.h create mode 100644 bolt/external/argobots/src/include/abti_barrier.h create mode 100644 bolt/external/argobots/src/include/abti_cond.h create mode 100644 bolt/external/argobots/src/include/abti_config.h create mode 100644 bolt/external/argobots/src/include/abti_error.h create mode 100644 bolt/external/argobots/src/include/abti_eventual.h create mode 100644 bolt/external/argobots/src/include/abti_future.h create mode 100644 bolt/external/argobots/src/include/abti_key.h create mode 100644 bolt/external/argobots/src/include/abti_local.h create mode 100644 bolt/external/argobots/src/include/abti_log.h create mode 100644 bolt/external/argobots/src/include/abti_mem.h create mode 100644 bolt/external/argobots/src/include/abti_mem_pool.h create mode 100644 bolt/external/argobots/src/include/abti_mutex.h create mode 100644 bolt/external/argobots/src/include/abti_mutex_attr.h create mode 100644 bolt/external/argobots/src/include/abti_pool.h create mode 100644 bolt/external/argobots/src/include/abti_rwlock.h create mode 100644 bolt/external/argobots/src/include/abti_sched.h create mode 100644 bolt/external/argobots/src/include/abti_self.h create mode 100644 bolt/external/argobots/src/include/abti_spinlock.h create mode 100644 bolt/external/argobots/src/include/abti_stream.h create mode 100644 bolt/external/argobots/src/include/abti_stream_barrier.h create mode 100644 bolt/external/argobots/src/include/abti_sync_lifo.h create mode 100644 bolt/external/argobots/src/include/abti_thread.h create mode 100644 bolt/external/argobots/src/include/abti_thread_attr.h create mode 100644 bolt/external/argobots/src/include/abti_timer.h create mode 100644 bolt/external/argobots/src/include/abti_tool.h create mode 100644 bolt/external/argobots/src/include/abti_valgrind.h create mode 100644 bolt/external/argobots/src/include/abti_ythread.h create mode 100644 bolt/external/argobots/src/include/abti_ythread_htable.h create mode 100644 bolt/external/argobots/src/include/abtu.h create mode 100644 bolt/external/argobots/src/include/asm/Makefile.mk create mode 100644 bolt/external/argobots/src/include/asm/abtd_asm_int128_cas.h create mode 100644 bolt/external/argobots/src/info.c create mode 100644 bolt/external/argobots/src/key.c create mode 100644 bolt/external/argobots/src/local.c create mode 100644 bolt/external/argobots/src/log.c create mode 100644 bolt/external/argobots/src/mem/Makefile.mk create mode 100644 bolt/external/argobots/src/mem/malloc.c create mode 100644 bolt/external/argobots/src/mem/mem_pool.c create mode 100644 bolt/external/argobots/src/mem/valgrind.c create mode 100644 bolt/external/argobots/src/mutex.c create mode 100644 bolt/external/argobots/src/mutex_attr.c create mode 100644 bolt/external/argobots/src/pool/Makefile.mk create mode 100644 bolt/external/argobots/src/pool/fifo.c create mode 100644 bolt/external/argobots/src/pool/fifo_wait.c create mode 100644 bolt/external/argobots/src/pool/pool.c create mode 100644 bolt/external/argobots/src/rwlock.c create mode 100644 bolt/external/argobots/src/sched/Makefile.mk create mode 100644 bolt/external/argobots/src/sched/basic.c create mode 100644 bolt/external/argobots/src/sched/basic_wait.c create mode 100644 bolt/external/argobots/src/sched/config.c create mode 100644 bolt/external/argobots/src/sched/prio.c create mode 100644 bolt/external/argobots/src/sched/randws.c create mode 100644 bolt/external/argobots/src/sched/sched.c create mode 100644 bolt/external/argobots/src/self.c create mode 100644 bolt/external/argobots/src/stream.c create mode 100644 bolt/external/argobots/src/stream_barrier.c create mode 100644 bolt/external/argobots/src/task.c create mode 100644 bolt/external/argobots/src/thread.c create mode 100644 bolt/external/argobots/src/thread_attr.c create mode 100644 bolt/external/argobots/src/timer.c create mode 100644 bolt/external/argobots/src/tool.c create mode 100644 bolt/external/argobots/src/unit.c create mode 100644 bolt/external/argobots/src/util/Makefile.mk create mode 100644 bolt/external/argobots/src/util/largepage.c create mode 100644 bolt/external/argobots/src/ythread.c create mode 100644 bolt/external/argobots/src/ythread_htable.c create mode 100644 bolt/external/argobots/test/.gitignore create mode 100644 bolt/external/argobots/test/Makefile.am create mode 100644 bolt/external/argobots/test/Makefile.mk create mode 100644 bolt/external/argobots/test/basic/Makefile.am create mode 100644 bolt/external/argobots/test/basic/barrier.c create mode 100644 bolt/external/argobots/test/basic/cond_join.c create mode 100644 bolt/external/argobots/test/basic/cond_signal_in_main.c create mode 100644 bolt/external/argobots/test/basic/cond_test.c create mode 100644 bolt/external/argobots/test/basic/cond_timedwait.c create mode 100644 bolt/external/argobots/test/basic/eventual_create.c create mode 100644 bolt/external/argobots/test/basic/eventual_test.c create mode 100644 bolt/external/argobots/test/basic/ext_thread.c create mode 100644 bolt/external/argobots/test/basic/ext_thread2.c create mode 100644 bolt/external/argobots/test/basic/future_create.c create mode 100644 bolt/external/argobots/test/basic/info_print.c create mode 100644 bolt/external/argobots/test/basic/info_stackdump.c create mode 100644 bolt/external/argobots/test/basic/info_stackdump2.c create mode 100644 bolt/external/argobots/test/basic/init_finalize.c create mode 100644 bolt/external/argobots/test/basic/main_sched.c create mode 100644 bolt/external/argobots/test/basic/mutex.c create mode 100644 bolt/external/argobots/test/basic/mutex_prio.c create mode 100644 bolt/external/argobots/test/basic/mutex_recursive.c create mode 100644 bolt/external/argobots/test/basic/mutex_spinlock.c create mode 100644 bolt/external/argobots/test/basic/mutex_unlock_se.c create mode 100644 bolt/external/argobots/test/basic/rwlock_reader_incl.c create mode 100644 bolt/external/argobots/test/basic/rwlock_reader_writer_excl.c create mode 100644 bolt/external/argobots/test/basic/rwlock_writer_excl.c create mode 100644 bolt/external/argobots/test/basic/sched_basic.c create mode 100644 bolt/external/argobots/test/basic/sched_basic_wait.c create mode 100644 bolt/external/argobots/test/basic/sched_config.c create mode 100644 bolt/external/argobots/test/basic/sched_on_thread.c create mode 100644 bolt/external/argobots/test/basic/sched_prio.c create mode 100644 bolt/external/argobots/test/basic/sched_randws.c create mode 100644 bolt/external/argobots/test/basic/sched_set_main.c create mode 100644 bolt/external/argobots/test/basic/sched_stack.c create mode 100644 bolt/external/argobots/test/basic/sched_user_ws.c create mode 100644 bolt/external/argobots/test/basic/self_type.c create mode 100644 bolt/external/argobots/test/basic/task_create.c create mode 100644 bolt/external/argobots/test/basic/task_create_on_xstream.c create mode 100644 bolt/external/argobots/test/basic/task_data.c create mode 100644 bolt/external/argobots/test/basic/task_data2.c create mode 100644 bolt/external/argobots/test/basic/task_revive.c create mode 100644 bolt/external/argobots/test/basic/thread_attr.c create mode 100644 bolt/external/argobots/test/basic/thread_create.c create mode 100644 bolt/external/argobots/test/basic/thread_create2.c create mode 100644 bolt/external/argobots/test/basic/thread_create_on_xstream.c create mode 100644 bolt/external/argobots/test/basic/thread_data.c create mode 100644 bolt/external/argobots/test/basic/thread_data2.c create mode 100644 bolt/external/argobots/test/basic/thread_get_last_xstream.c create mode 100644 bolt/external/argobots/test/basic/thread_id.c create mode 100644 bolt/external/argobots/test/basic/thread_migrate.c create mode 100644 bolt/external/argobots/test/basic/thread_revive.c create mode 100644 bolt/external/argobots/test/basic/thread_self_suspend_resume.c create mode 100644 bolt/external/argobots/test/basic/thread_task.c create mode 100644 bolt/external/argobots/test/basic/thread_task_arg.c create mode 100644 bolt/external/argobots/test/basic/thread_task_num.c create mode 100644 bolt/external/argobots/test/basic/thread_yield.c create mode 100644 bolt/external/argobots/test/basic/thread_yield_to.c create mode 100644 bolt/external/argobots/test/basic/timer.c create mode 100644 bolt/external/argobots/test/basic/xstream_affinity.c create mode 100644 bolt/external/argobots/test/basic/xstream_barrier.c create mode 100644 bolt/external/argobots/test/basic/xstream_create.c create mode 100644 bolt/external/argobots/test/basic/xstream_rank.c create mode 100644 bolt/external/argobots/test/basic/xstream_revive.c create mode 100644 bolt/external/argobots/test/benchmark/Makefile.am create mode 100644 bolt/external/argobots/test/benchmark/bench_util.h create mode 100644 bolt/external/argobots/test/benchmark/init_finalize.c create mode 100644 bolt/external/argobots/test/benchmark/sync_ops.c create mode 100644 bolt/external/argobots/test/benchmark/task_fork_join.c create mode 100644 bolt/external/argobots/test/benchmark/task_ops.c create mode 100644 bolt/external/argobots/test/benchmark/task_ops_all.c create mode 100644 bolt/external/argobots/test/benchmark/thread_fork_join.c create mode 100644 bolt/external/argobots/test/benchmark/thread_many_ops.c create mode 100644 bolt/external/argobots/test/benchmark/thread_ops.c create mode 100644 bolt/external/argobots/test/benchmark/thread_ops_all.c create mode 100644 bolt/external/argobots/test/benchmark/xstream_ops.c create mode 100755 bolt/external/argobots/test/scripts/create_junit.pl create mode 100644 bolt/external/argobots/test/util/Makefile.am create mode 100644 bolt/external/argobots/test/util/abttest.c create mode 100644 bolt/external/argobots/test/util/abttest.h create mode 100644 bolt/libomptarget/CMakeLists.txt create mode 100644 bolt/libomptarget/README.txt create mode 100644 bolt/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake create mode 100644 bolt/libomptarget/cmake/Modules/LibomptargetNVPTXBitcodeLibrary.cmake create mode 100644 bolt/libomptarget/cmake/Modules/LibomptargetUtils.cmake create mode 100644 bolt/libomptarget/deviceRTLs/CMakeLists.txt create mode 100644 bolt/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt create mode 100644 bolt/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h create mode 100644 bolt/libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip create mode 100644 bolt/libomptarget/deviceRTLs/amdgcn/src/amdgcn_smid.hip create mode 100644 bolt/libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h create mode 100644 bolt/libomptarget/deviceRTLs/amdgcn/src/target_impl.h create mode 100644 bolt/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip create mode 100644 bolt/libomptarget/deviceRTLs/common/debug.h create mode 100644 bolt/libomptarget/deviceRTLs/common/device_environment.h create mode 100644 bolt/libomptarget/deviceRTLs/common/omptarget.h create mode 100644 bolt/libomptarget/deviceRTLs/common/omptargeti.h create mode 100644 bolt/libomptarget/deviceRTLs/common/src/cancel.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/critical.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/data_sharing.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/libcall.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/loop.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/omp_data.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/omptarget.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/parallel.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/reduction.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/support.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/sync.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/src/task.cu create mode 100644 bolt/libomptarget/deviceRTLs/common/state-queue.h create mode 100644 bolt/libomptarget/deviceRTLs/common/state-queuei.h create mode 100644 bolt/libomptarget/deviceRTLs/common/support.h create mode 100644 bolt/libomptarget/deviceRTLs/common/target_atomic.h create mode 100644 bolt/libomptarget/deviceRTLs/interface.h create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/CMakeLists.txt create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/docs/ReductionDesign.txt create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/src/target_impl.cu create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/src/target_impl.h create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/api/get_max_threads.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/api/ignored.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/api/max_threads.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/lit.cfg create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/parallel/barrier.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/parallel/level.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/parallel/nested.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c create mode 100644 bolt/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c create mode 100644 bolt/libomptarget/include/omptarget.h create mode 100644 bolt/libomptarget/include/omptargetplugin.h create mode 100644 bolt/libomptarget/plugins/CMakeLists.txt create mode 100644 bolt/libomptarget/plugins/aarch64/CMakeLists.txt create mode 100644 bolt/libomptarget/plugins/common/elf_common.c create mode 100644 bolt/libomptarget/plugins/cuda/CMakeLists.txt create mode 100644 bolt/libomptarget/plugins/cuda/src/rtl.cpp create mode 100644 bolt/libomptarget/plugins/exports create mode 100644 bolt/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp create mode 100644 bolt/libomptarget/plugins/ppc64/CMakeLists.txt create mode 100644 bolt/libomptarget/plugins/ppc64le/CMakeLists.txt create mode 100644 bolt/libomptarget/plugins/ve/CMakeLists.txt create mode 100644 bolt/libomptarget/plugins/ve/src/rtl.cpp create mode 100644 bolt/libomptarget/plugins/x86_64/CMakeLists.txt create mode 100644 bolt/libomptarget/src/CMakeLists.txt create mode 100644 bolt/libomptarget/src/api.cpp create mode 100644 bolt/libomptarget/src/device.cpp create mode 100644 bolt/libomptarget/src/device.h create mode 100644 bolt/libomptarget/src/exports create mode 100644 bolt/libomptarget/src/interface.cpp create mode 100644 bolt/libomptarget/src/omptarget.cpp create mode 100644 bolt/libomptarget/src/private.h create mode 100644 bolt/libomptarget/src/rtl.cpp create mode 100644 bolt/libomptarget/src/rtl.h create mode 100644 bolt/libomptarget/test/CMakeLists.txt create mode 100644 bolt/libomptarget/test/api/omp_get_num_devices.c create mode 100644 bolt/libomptarget/test/api/omp_get_num_devices_with_empty_target.c create mode 100644 bolt/libomptarget/test/env/omp_target_debug.c create mode 100644 bolt/libomptarget/test/lit.cfg create mode 100644 bolt/libomptarget/test/lit.site.cfg.in create mode 100644 bolt/libomptarget/test/mapping/alloc_fail.c create mode 100644 bolt/libomptarget/test/mapping/declare_mapper_api.cpp create mode 100644 bolt/libomptarget/test/mapping/delete_inf_refcount.c create mode 100644 bolt/libomptarget/test/mapping/pr38704.c create mode 100644 bolt/libomptarget/test/offloading/d2d_memcpy.c create mode 100644 bolt/libomptarget/test/offloading/dynamic_module.c create mode 100644 bolt/libomptarget/test/offloading/dynamic_module_load.c create mode 100644 bolt/libomptarget/test/offloading/looptripcnt.c create mode 100644 bolt/libomptarget/test/offloading/offloading_success.c create mode 100644 bolt/libomptarget/test/offloading/offloading_success.cpp create mode 100644 bolt/libomptarget/test/offloading/parallel_offloading_map.c create mode 100644 bolt/libomptarget/test/offloading/requires.c create mode 100644 bolt/libomptarget/test/offloading/target_depend_nowait.cpp create mode 100644 bolt/libomptarget/test/unified_shared_memory/api.c create mode 100644 bolt/libomptarget/test/unified_shared_memory/close_enter_exit.c create mode 100644 bolt/libomptarget/test/unified_shared_memory/close_manual.c create mode 100644 bolt/libomptarget/test/unified_shared_memory/close_modifier.c create mode 100644 bolt/libomptarget/test/unified_shared_memory/shared_update.c create mode 100755 bolt/maint/bolt-release.pl create mode 100755 bolt/maint/update-llvmomp.sh create mode 100644 bolt/runtime/.clang-format create mode 100644 bolt/runtime/CMakeLists.txt create mode 100644 bolt/runtime/README.txt create mode 100644 bolt/runtime/cmake/LibboltSymlinkArgobots.cmake create mode 100644 bolt/runtime/cmake/LibompCheckFortranFlag.cmake create mode 100644 bolt/runtime/cmake/LibompCheckLinkerFlag.cmake create mode 100644 bolt/runtime/cmake/LibompDefinitions.cmake create mode 100644 bolt/runtime/cmake/LibompExports.cmake create mode 100644 bolt/runtime/cmake/LibompGetArchitecture.cmake create mode 100644 bolt/runtime/cmake/LibompHandleFlags.cmake create mode 100644 bolt/runtime/cmake/LibompMicroTests.cmake create mode 100644 bolt/runtime/cmake/LibompUtils.cmake create mode 100644 bolt/runtime/cmake/config-ix.cmake create mode 100644 bolt/runtime/doc/Reference.pdf create mode 100644 bolt/runtime/doc/doxygen/config create mode 100644 bolt/runtime/doc/doxygen/header.tex create mode 100644 bolt/runtime/doc/doxygen/libomp_interface.h create mode 100644 bolt/runtime/src/CMakeLists.txt create mode 100644 bolt/runtime/src/dllexports create mode 100644 bolt/runtime/src/exports_so.txt create mode 100644 bolt/runtime/src/extractExternal.cpp create mode 100644 bolt/runtime/src/i18n/en_US.txt create mode 100644 bolt/runtime/src/include/omp-tools.h.var create mode 100644 bolt/runtime/src/include/omp.h.var create mode 100644 bolt/runtime/src/include/omp_lib.f.var create mode 100644 bolt/runtime/src/include/omp_lib.f90.var create mode 100644 bolt/runtime/src/include/omp_lib.h.var create mode 100644 bolt/runtime/src/kmp.h create mode 100644 bolt/runtime/src/kmp_abt.h create mode 100644 bolt/runtime/src/kmp_abt_affinity.cpp create mode 100644 bolt/runtime/src/kmp_affinity.cpp create mode 100644 bolt/runtime/src/kmp_affinity.h create mode 100644 bolt/runtime/src/kmp_alloc.cpp create mode 100644 bolt/runtime/src/kmp_atomic.cpp create mode 100644 bolt/runtime/src/kmp_atomic.h create mode 100644 bolt/runtime/src/kmp_barrier.cpp create mode 100644 bolt/runtime/src/kmp_cancel.cpp create mode 100644 bolt/runtime/src/kmp_config.h.cmake create mode 100644 bolt/runtime/src/kmp_csupport.cpp create mode 100644 bolt/runtime/src/kmp_debug.cpp create mode 100644 bolt/runtime/src/kmp_debug.h create mode 100644 bolt/runtime/src/kmp_debugger.cpp create mode 100644 bolt/runtime/src/kmp_debugger.h create mode 100644 bolt/runtime/src/kmp_dispatch.cpp create mode 100644 bolt/runtime/src/kmp_dispatch.h create mode 100644 bolt/runtime/src/kmp_dispatch_hier.h create mode 100644 bolt/runtime/src/kmp_environment.cpp create mode 100644 bolt/runtime/src/kmp_environment.h create mode 100644 bolt/runtime/src/kmp_error.cpp create mode 100644 bolt/runtime/src/kmp_error.h create mode 100644 bolt/runtime/src/kmp_ftn_cdecl.cpp create mode 100644 bolt/runtime/src/kmp_ftn_entry.h create mode 100644 bolt/runtime/src/kmp_ftn_extra.cpp create mode 100644 bolt/runtime/src/kmp_ftn_os.h create mode 100644 bolt/runtime/src/kmp_ftn_stdcall.cpp create mode 100644 bolt/runtime/src/kmp_global.cpp create mode 100644 bolt/runtime/src/kmp_gsupport.cpp create mode 100644 bolt/runtime/src/kmp_i18n.cpp create mode 100644 bolt/runtime/src/kmp_i18n.h create mode 100644 bolt/runtime/src/kmp_import.cpp create mode 100644 bolt/runtime/src/kmp_io.cpp create mode 100644 bolt/runtime/src/kmp_io.h create mode 100644 bolt/runtime/src/kmp_itt.cpp create mode 100644 bolt/runtime/src/kmp_itt.h create mode 100644 bolt/runtime/src/kmp_itt.inl create mode 100644 bolt/runtime/src/kmp_lock.cpp create mode 100644 bolt/runtime/src/kmp_lock.h create mode 100644 bolt/runtime/src/kmp_omp.h create mode 100644 bolt/runtime/src/kmp_os.h create mode 100644 bolt/runtime/src/kmp_platform.h create mode 100644 bolt/runtime/src/kmp_runtime.cpp create mode 100644 bolt/runtime/src/kmp_safe_c_api.h create mode 100644 bolt/runtime/src/kmp_sched.cpp create mode 100644 bolt/runtime/src/kmp_settings.cpp create mode 100644 bolt/runtime/src/kmp_settings.h create mode 100644 bolt/runtime/src/kmp_stats.cpp create mode 100644 bolt/runtime/src/kmp_stats.h create mode 100644 bolt/runtime/src/kmp_stats_timing.cpp create mode 100644 bolt/runtime/src/kmp_stats_timing.h create mode 100644 bolt/runtime/src/kmp_str.cpp create mode 100644 bolt/runtime/src/kmp_str.h create mode 100644 bolt/runtime/src/kmp_stub.cpp create mode 100644 bolt/runtime/src/kmp_stub.h create mode 100644 bolt/runtime/src/kmp_taskdeps.cpp create mode 100644 bolt/runtime/src/kmp_taskdeps.h create mode 100644 bolt/runtime/src/kmp_tasking.cpp create mode 100644 bolt/runtime/src/kmp_threadprivate.cpp create mode 100644 bolt/runtime/src/kmp_utility.cpp create mode 100644 bolt/runtime/src/kmp_version.cpp create mode 100644 bolt/runtime/src/kmp_version.h create mode 100644 bolt/runtime/src/kmp_wait_release.cpp create mode 100644 bolt/runtime/src/kmp_wait_release.h create mode 100644 bolt/runtime/src/kmp_wrapper_getpid.h create mode 100644 bolt/runtime/src/kmp_wrapper_malloc.h create mode 100644 bolt/runtime/src/libomp.rc.var create mode 100644 bolt/runtime/src/ompt-event-specific.h create mode 100644 bolt/runtime/src/ompt-general.cpp create mode 100644 bolt/runtime/src/ompt-internal.h create mode 100644 bolt/runtime/src/ompt-specific.cpp create mode 100644 bolt/runtime/src/ompt-specific.h create mode 100644 bolt/runtime/src/test-touch.c create mode 100644 bolt/runtime/src/thirdparty/ittnotify/disable_warnings.h create mode 100644 bolt/runtime/src/thirdparty/ittnotify/ittnotify.h create mode 100644 bolt/runtime/src/thirdparty/ittnotify/ittnotify_config.h create mode 100644 bolt/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp create mode 100644 bolt/runtime/src/thirdparty/ittnotify/ittnotify_static.h create mode 100644 bolt/runtime/src/thirdparty/ittnotify/ittnotify_types.h create mode 100644 bolt/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h create mode 100644 bolt/runtime/src/tsan_annotations.cpp create mode 100644 bolt/runtime/src/tsan_annotations.h create mode 100644 bolt/runtime/src/z_Linux_asm.S create mode 100644 bolt/runtime/src/z_Linux_util.cpp create mode 100644 bolt/runtime/src/z_Windows_NT-586_asm.asm create mode 100644 bolt/runtime/src/z_Windows_NT-586_util.cpp create mode 100644 bolt/runtime/src/z_Windows_NT_util.cpp create mode 100644 bolt/runtime/test/CMakeLists.txt create mode 100644 bolt/runtime/test/affinity/bug-nested.c create mode 100644 bolt/runtime/test/affinity/format/affinity_display.1.c create mode 100644 bolt/runtime/test/affinity/format/affinity_values.c create mode 100644 bolt/runtime/test/affinity/format/api.c create mode 100644 bolt/runtime/test/affinity/format/api2.c create mode 100644 bolt/runtime/test/affinity/format/check.py create mode 100644 bolt/runtime/test/affinity/format/fields_modifiers.c create mode 100644 bolt/runtime/test/affinity/format/fields_values.c create mode 100644 bolt/runtime/test/affinity/format/increase.c create mode 100644 bolt/runtime/test/affinity/format/lit.local.cfg create mode 100644 bolt/runtime/test/affinity/format/nested.c create mode 100644 bolt/runtime/test/affinity/format/nested2.c create mode 100644 bolt/runtime/test/affinity/format/nested_mixed.c create mode 100644 bolt/runtime/test/affinity/format/nested_serial.c create mode 100644 bolt/runtime/test/affinity/format/proc_bind.c create mode 100644 bolt/runtime/test/affinity/format/simple.c create mode 100644 bolt/runtime/test/affinity/format/simple_env.c create mode 100644 bolt/runtime/test/api/has_openmp.c create mode 100644 bolt/runtime/test/api/kmp_aligned_malloc.c create mode 100644 bolt/runtime/test/api/kmp_set_defaults_lock_bug.c create mode 100644 bolt/runtime/test/api/omp_alloc_def_fb.c create mode 100644 bolt/runtime/test/api/omp_alloc_hbw.c create mode 100644 bolt/runtime/test/api/omp_alloc_null_fb.c create mode 100644 bolt/runtime/test/api/omp_display_env0.c create mode 100644 bolt/runtime/test/api/omp_get_num_devices.c create mode 100644 bolt/runtime/test/api/omp_get_num_threads.c create mode 100644 bolt/runtime/test/api/omp_get_wtick.c create mode 100644 bolt/runtime/test/api/omp_get_wtime.c create mode 100644 bolt/runtime/test/api/omp_in_parallel.c create mode 100644 bolt/runtime/test/api/omp_pause_resource.c create mode 100644 bolt/runtime/test/atomic/omp_atomic.c create mode 100644 bolt/runtime/test/barrier/omp_barrier.c create mode 100644 bolt/runtime/test/bolt/interop/init_then_openmp.c create mode 100644 bolt/runtime/test/bolt/interop/openmp_then_init.c create mode 100644 bolt/runtime/test/bolt/misc_bugs/untied_tasks.c create mode 100644 bolt/runtime/test/bolt/scheduling/bolt_scheduling_util.h create mode 100644 bolt/runtime/test/bolt/scheduling/for_nowait_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/task_tied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/task_tied_thread_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/task_unitied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/task_untied_thread_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskdep_taskgroup_tied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskdep_taskgroup_untied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskdep_taskgroup_untied_yield_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskdep_taskwait_tied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskdep_taskwait_untied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskdep_taskwait_untied_yield_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskdep_tied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskdep_untied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskdep_untied_yield_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskloop_nogroup_tied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskloop_nogroup_untied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskloop_tied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/taskloop_untied_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/thread_scheduling.c create mode 100644 bolt/runtime/test/bolt/scheduling/thread_thread_scheduling.c create mode 100644 bolt/runtime/test/bolt/threadid/task_tied_thread_threadid.c create mode 100644 bolt/runtime/test/bolt/threadid/task_tied_threadid.c create mode 100644 bolt/runtime/test/bolt/threadid/task_unitied_thread_threadid.c create mode 100644 bolt/runtime/test/bolt/threadid/task_untied_threadid.c create mode 100644 bolt/runtime/test/bolt/threadid/task_untied_threadid2.c create mode 100644 bolt/runtime/test/bolt/threadid/taskdep_tied_threadid.c create mode 100644 bolt/runtime/test/bolt/threadid/taskdep_untied_threadid.c create mode 100644 bolt/runtime/test/bolt/threadid/taskdep_untied_threadid2.c create mode 100644 bolt/runtime/test/bolt/threadid/taskloop_tied_threadid.c create mode 100644 bolt/runtime/test/bolt/threadid/taskloop_untied_threadid.c create mode 100644 bolt/runtime/test/bolt/threadid/thread_thread_threadid.c create mode 100644 bolt/runtime/test/bolt/threadid/thread_threadid.c create mode 100644 bolt/runtime/test/critical/omp_critical.c create mode 100644 bolt/runtime/test/env/kmp_aff_disable_hwloc.c create mode 100644 bolt/runtime/test/env/kmp_set_dispatch_buf.c create mode 100644 bolt/runtime/test/env/omp_target_offload.c create mode 100644 bolt/runtime/test/env/omp_thread_limit.c create mode 100644 bolt/runtime/test/env/omp_wait_policy.c create mode 100644 bolt/runtime/test/flush/omp_flush.c create mode 100644 bolt/runtime/test/lit.cfg create mode 100644 bolt/runtime/test/lit.site.cfg.in create mode 100644 bolt/runtime/test/lock/omp_init_lock.c create mode 100644 bolt/runtime/test/lock/omp_lock.c create mode 100644 bolt/runtime/test/lock/omp_nest_lock.c create mode 100644 bolt/runtime/test/lock/omp_test_lock.c create mode 100644 bolt/runtime/test/lock/omp_test_nest_lock.c create mode 100644 bolt/runtime/test/master/omp_master.c create mode 100644 bolt/runtime/test/master/omp_master_3.c create mode 100644 bolt/runtime/test/misc_bugs/cancellation_for_sections.c create mode 100644 bolt/runtime/test/misc_bugs/for-task-for-task.c create mode 100644 bolt/runtime/test/misc_bugs/for-task-for.c create mode 100644 bolt/runtime/test/misc_bugs/many-microtask-args.c create mode 100644 bolt/runtime/test/misc_bugs/omp_foreign_thread_team_reuse.c create mode 100644 bolt/runtime/test/misc_bugs/stack-propagate.c create mode 100644 bolt/runtime/test/misc_bugs/teams-no-par.c create mode 100644 bolt/runtime/test/misc_bugs/teams-reduction.c create mode 100644 bolt/runtime/test/omp_my_sleep.h create mode 100644 bolt/runtime/test/omp_testsuite.h create mode 100644 bolt/runtime/test/ompt/callback.h create mode 100644 bolt/runtime/test/ompt/cancel/cancel_parallel.c create mode 100644 bolt/runtime/test/ompt/cancel/cancel_taskgroup.c create mode 100644 bolt/runtime/test/ompt/cancel/cancel_worksharing.c create mode 100644 bolt/runtime/test/ompt/loadtool/tool_available/tool_available.c create mode 100644 bolt/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c create mode 100644 bolt/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c create mode 100644 bolt/runtime/test/ompt/misc/api_calls_from_other_thread.cpp create mode 100644 bolt/runtime/test/ompt/misc/api_calls_misc.c create mode 100644 bolt/runtime/test/ompt/misc/api_calls_places.c create mode 100644 bolt/runtime/test/ompt/misc/api_calls_without_ompt.c create mode 100644 bolt/runtime/test/ompt/misc/control_tool.c create mode 100644 bolt/runtime/test/ompt/misc/control_tool_no_ompt_support.c create mode 100644 bolt/runtime/test/ompt/misc/finalize_tool.c create mode 100644 bolt/runtime/test/ompt/misc/interoperability.cpp create mode 100644 bolt/runtime/test/ompt/misc/threads.c create mode 100644 bolt/runtime/test/ompt/misc/threads_nested.c create mode 100644 bolt/runtime/test/ompt/misc/unset_callback.c create mode 100644 bolt/runtime/test/ompt/ompt-signal.h create mode 100644 bolt/runtime/test/ompt/parallel/dynamic_enough_threads.c create mode 100644 bolt/runtime/test/ompt/parallel/dynamic_not_enough_threads.c create mode 100644 bolt/runtime/test/ompt/parallel/max_active_levels_serialized.c create mode 100644 bolt/runtime/test/ompt/parallel/nested.c create mode 100644 bolt/runtime/test/ompt/parallel/nested_lwt.c create mode 100644 bolt/runtime/test/ompt/parallel/nested_serialized.c create mode 100644 bolt/runtime/test/ompt/parallel/nested_thread_num.c create mode 100644 bolt/runtime/test/ompt/parallel/nested_threadnum.c create mode 100644 bolt/runtime/test/ompt/parallel/no_thread_num_clause.c create mode 100644 bolt/runtime/test/ompt/parallel/normal.c create mode 100644 bolt/runtime/test/ompt/parallel/not_enough_threads.c create mode 100644 bolt/runtime/test/ompt/parallel/parallel_if0.c create mode 100644 bolt/runtime/test/ompt/parallel/serialized.c create mode 100644 bolt/runtime/test/ompt/synchronization/barrier/explicit.c create mode 100644 bolt/runtime/test/ompt/synchronization/barrier/for_loop.c create mode 100644 bolt/runtime/test/ompt/synchronization/barrier/for_simd.c create mode 100644 bolt/runtime/test/ompt/synchronization/barrier/implicit_task_data.c create mode 100644 bolt/runtime/test/ompt/synchronization/barrier/parallel_region.c create mode 100644 bolt/runtime/test/ompt/synchronization/barrier/sections.c create mode 100644 bolt/runtime/test/ompt/synchronization/barrier/single.c create mode 100644 bolt/runtime/test/ompt/synchronization/critical.c create mode 100644 bolt/runtime/test/ompt/synchronization/flush.c create mode 100644 bolt/runtime/test/ompt/synchronization/lock.c create mode 100644 bolt/runtime/test/ompt/synchronization/master.c create mode 100644 bolt/runtime/test/ompt/synchronization/nest_lock.c create mode 100644 bolt/runtime/test/ompt/synchronization/ordered.c create mode 100644 bolt/runtime/test/ompt/synchronization/ordered_dependences.c create mode 100644 bolt/runtime/test/ompt/synchronization/reduction/empty_reduce.c create mode 100644 bolt/runtime/test/ompt/synchronization/reduction/tree_reduce.c create mode 100644 bolt/runtime/test/ompt/synchronization/taskgroup.c create mode 100644 bolt/runtime/test/ompt/synchronization/taskwait.c create mode 100644 bolt/runtime/test/ompt/synchronization/test_lock.c create mode 100644 bolt/runtime/test/ompt/synchronization/test_nest_lock.c create mode 100644 bolt/runtime/test/ompt/synchronization/test_nest_lock_parallel.c create mode 100644 bolt/runtime/test/ompt/tasks/dependences.c create mode 100644 bolt/runtime/test/ompt/tasks/dependences_mutexinoutset.c create mode 100644 bolt/runtime/test/ompt/tasks/explicit_task.c create mode 100644 bolt/runtime/test/ompt/tasks/serialized.c create mode 100644 bolt/runtime/test/ompt/tasks/task_early_fulfill.c create mode 100644 bolt/runtime/test/ompt/tasks/task_in_joinbarrier.c create mode 100644 bolt/runtime/test/ompt/tasks/task_late_fulfill.c create mode 100644 bolt/runtime/test/ompt/tasks/task_memory.c create mode 100644 bolt/runtime/test/ompt/tasks/task_types.c create mode 100644 bolt/runtime/test/ompt/tasks/task_types_serialized.c create mode 100644 bolt/runtime/test/ompt/tasks/taskloop.c create mode 100644 bolt/runtime/test/ompt/tasks/taskwait-depend.c create mode 100644 bolt/runtime/test/ompt/tasks/taskyield.c create mode 100644 bolt/runtime/test/ompt/tasks/untied_task.c create mode 100644 bolt/runtime/test/ompt/teams/parallel_team.c create mode 100644 bolt/runtime/test/ompt/teams/serial_teams.c create mode 100644 bolt/runtime/test/ompt/teams/serialized.c create mode 100644 bolt/runtime/test/ompt/teams/team.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/auto.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/auto_serialized.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/auto_split.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/base.h create mode 100644 bolt/runtime/test/ompt/worksharing/for/base_serialized.h create mode 100644 bolt/runtime/test/ompt/worksharing/for/base_split.h create mode 100644 bolt/runtime/test/ompt/worksharing/for/dynamic.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/dynamic_serialized.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/dynamic_split.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/guided.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/guided_serialized.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/guided_split.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/runtime.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/runtime_serialized.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/runtime_split.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/static.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/static_serialized.c create mode 100644 bolt/runtime/test/ompt/worksharing/for/static_split.c create mode 100644 bolt/runtime/test/ompt/worksharing/sections.c create mode 100644 bolt/runtime/test/ompt/worksharing/single.c create mode 100644 bolt/runtime/test/parallel/omp_nested.c create mode 100644 bolt/runtime/test/parallel/omp_parallel_copyin.c create mode 100644 bolt/runtime/test/parallel/omp_parallel_default.c create mode 100644 bolt/runtime/test/parallel/omp_parallel_firstprivate.c create mode 100644 bolt/runtime/test/parallel/omp_parallel_if.c create mode 100644 bolt/runtime/test/parallel/omp_parallel_num_threads.c create mode 100644 bolt/runtime/test/parallel/omp_parallel_private.c create mode 100644 bolt/runtime/test/parallel/omp_parallel_reduction.c create mode 100644 bolt/runtime/test/parallel/omp_parallel_shared.c create mode 100644 bolt/runtime/test/tasking/bug_36720.c create mode 100644 bolt/runtime/test/tasking/bug_nested_proxy_task.c create mode 100644 bolt/runtime/test/tasking/bug_proxy_task_dep_waiting.c create mode 100644 bolt/runtime/test/tasking/bug_serial_taskgroup.c create mode 100644 bolt/runtime/test/tasking/kmp_detach_tasks_t1.c create mode 100644 bolt/runtime/test/tasking/kmp_detach_tasks_t2.c create mode 100644 bolt/runtime/test/tasking/kmp_detach_tasks_t3.c create mode 100644 bolt/runtime/test/tasking/kmp_task_modifier_simple_par_new.cpp create mode 100644 bolt/runtime/test/tasking/kmp_task_modifier_simple_par_old.cpp create mode 100644 bolt/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp create mode 100644 bolt/runtime/test/tasking/kmp_task_modifier_simple_ws_old.cpp create mode 100644 bolt/runtime/test/tasking/kmp_task_reduction_nest.cpp create mode 100644 bolt/runtime/test/tasking/kmp_taskloop.c create mode 100644 bolt/runtime/test/tasking/nested_parallel_tasking.c create mode 100644 bolt/runtime/test/tasking/nested_task_creation.c create mode 100644 bolt/runtime/test/tasking/omp50_task_depend_mtx.c create mode 100644 bolt/runtime/test/tasking/omp50_task_depend_mtx2.c create mode 100644 bolt/runtime/test/tasking/omp_detach_taskwait.c create mode 100644 bolt/runtime/test/tasking/omp_fill_taskqueue.c create mode 100644 bolt/runtime/test/tasking/omp_task.c create mode 100644 bolt/runtime/test/tasking/omp_task_depend.c create mode 100644 bolt/runtime/test/tasking/omp_task_depend_resize_hashmap.c create mode 100644 bolt/runtime/test/tasking/omp_task_final.c create mode 100644 bolt/runtime/test/tasking/omp_task_firstprivate.c create mode 100644 bolt/runtime/test/tasking/omp_task_if.c create mode 100644 bolt/runtime/test/tasking/omp_task_imp_firstprivate.c create mode 100644 bolt/runtime/test/tasking/omp_task_nest_tied.c create mode 100644 bolt/runtime/test/tasking/omp_task_nest_untied.c create mode 100644 bolt/runtime/test/tasking/omp_task_priority.c create mode 100644 bolt/runtime/test/tasking/omp_task_private.c create mode 100644 bolt/runtime/test/tasking/omp_task_red_taskloop.c create mode 100644 bolt/runtime/test/tasking/omp_task_shared.c create mode 100644 bolt/runtime/test/tasking/omp_taskloop_grainsize.c create mode 100644 bolt/runtime/test/tasking/omp_taskloop_num_tasks.c create mode 100644 bolt/runtime/test/tasking/omp_taskloop_taskwait.c create mode 100644 bolt/runtime/test/tasking/omp_taskwait.c create mode 100644 bolt/runtime/test/tasking/omp_taskyield.c create mode 100644 bolt/runtime/test/tasking/omp_taskyield_tied.c create mode 100644 bolt/runtime/test/threadprivate/omp_threadprivate.c create mode 100644 bolt/runtime/test/threadprivate/omp_threadprivate_for.c create mode 100644 bolt/runtime/test/worksharing/for/bug_set_schedule_0.c create mode 100644 bolt/runtime/test/worksharing/for/kmp_doacross_check.c create mode 100644 bolt/runtime/test/worksharing/for/kmp_sch_simd_guided.c create mode 100644 bolt/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c create mode 100644 bolt/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c create mode 100644 bolt/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c create mode 100644 bolt/runtime/test/worksharing/for/kmp_set_dispatch_buf.c create mode 100644 bolt/runtime/test/worksharing/for/omp_doacross.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_bigbounds.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_collapse.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_collapse_mini.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_firstprivate.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_firstprivate_nothreadprivate.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_lastprivate.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_lastprivate_nothreadprivate.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_nowait.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_ordered.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_private.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_private_nothreadprivate.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_reduction.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_schedule_auto.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_schedule_auto_nothreadprivate.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_schedule_dynamic.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_schedule_guided.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_schedule_runtime.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_schedule_static.c create mode 100644 bolt/runtime/test/worksharing/for/omp_for_schedule_static_3.c create mode 100644 bolt/runtime/test/worksharing/for/omp_monotonic_env.c create mode 100644 bolt/runtime/test/worksharing/for/omp_monotonic_schedule_set_get.c create mode 100644 bolt/runtime/test/worksharing/for/omp_nonmonotonic_dynamic1.c create mode 100644 bolt/runtime/test/worksharing/for/omp_nonmonotonic_nowait.c create mode 100644 bolt/runtime/test/worksharing/for/omp_parallel_for_firstprivate.c create mode 100644 bolt/runtime/test/worksharing/for/omp_parallel_for_if.c create mode 100644 bolt/runtime/test/worksharing/for/omp_parallel_for_lastprivate.c create mode 100644 bolt/runtime/test/worksharing/for/omp_parallel_for_ordered.c create mode 100644 bolt/runtime/test/worksharing/for/omp_parallel_for_ordered_nothreadprivate.c create mode 100644 bolt/runtime/test/worksharing/for/omp_parallel_for_private.c create mode 100644 bolt/runtime/test/worksharing/for/omp_parallel_for_reduction.c create mode 100644 bolt/runtime/test/worksharing/sections/omp_parallel_sections_firstprivate.c create mode 100644 bolt/runtime/test/worksharing/sections/omp_parallel_sections_lastprivate.c create mode 100644 bolt/runtime/test/worksharing/sections/omp_parallel_sections_private.c create mode 100644 bolt/runtime/test/worksharing/sections/omp_parallel_sections_reduction.c create mode 100644 bolt/runtime/test/worksharing/sections/omp_section_firstprivate.c create mode 100644 bolt/runtime/test/worksharing/sections/omp_section_lastprivate.c create mode 100644 bolt/runtime/test/worksharing/sections/omp_section_private.c create mode 100644 bolt/runtime/test/worksharing/sections/omp_sections_nowait.c create mode 100644 bolt/runtime/test/worksharing/sections/omp_sections_reduction.c create mode 100644 bolt/runtime/test/worksharing/single/omp_single.c create mode 100644 bolt/runtime/test/worksharing/single/omp_single_copyprivate.c create mode 100644 bolt/runtime/test/worksharing/single/omp_single_copyprivate_nothreadprivate.c create mode 100644 bolt/runtime/test/worksharing/single/omp_single_nowait.c create mode 100644 bolt/runtime/test/worksharing/single/omp_single_private.c create mode 100644 bolt/runtime/test/worksharing/single/omp_single_private_nothreadprivate.c create mode 100755 bolt/runtime/tools/check-depends.pl create mode 100755 bolt/runtime/tools/check-execstack.pl create mode 100755 bolt/runtime/tools/check-instruction-set.pl create mode 100755 bolt/runtime/tools/generate-def.pl create mode 100644 bolt/runtime/tools/lib/Build.pm create mode 100644 bolt/runtime/tools/lib/LibOMP.pm create mode 100644 bolt/runtime/tools/lib/Platform.pm create mode 100644 bolt/runtime/tools/lib/Uname.pm create mode 100644 bolt/runtime/tools/lib/tools.pm create mode 100755 bolt/runtime/tools/message-converter.pl create mode 100644 bolt/runtime/tools/summarizeStats.py create mode 100644 bolt/tools/CMakeLists.txt create mode 100644 bolt/tools/archer/CMakeLists.txt create mode 100644 bolt/tools/archer/README.md create mode 100644 bolt/tools/archer/ompt-tsan.cpp create mode 100644 bolt/tools/archer/tests/CMakeLists.txt create mode 100644 bolt/tools/archer/tests/barrier/barrier.c create mode 100644 bolt/tools/archer/tests/critical/critical.c create mode 100644 bolt/tools/archer/tests/critical/lock-nested.c create mode 100644 bolt/tools/archer/tests/critical/lock.c create mode 100755 bolt/tools/archer/tests/deflake.bash create mode 100644 bolt/tools/archer/tests/lit.cfg create mode 100644 bolt/tools/archer/tests/lit.site.cfg.in create mode 100644 bolt/tools/archer/tests/ompt/ompt-signal.h create mode 100644 bolt/tools/archer/tests/parallel/parallel-firstprivate.c create mode 100644 bolt/tools/archer/tests/parallel/parallel-simple.c create mode 100644 bolt/tools/archer/tests/parallel/parallel-simple2.c create mode 100644 bolt/tools/archer/tests/races/critical-unrelated.c create mode 100644 bolt/tools/archer/tests/races/lock-nested-unrelated.c create mode 100644 bolt/tools/archer/tests/races/lock-unrelated.c create mode 100644 bolt/tools/archer/tests/races/parallel-simple.c create mode 100644 bolt/tools/archer/tests/races/task-dependency.c create mode 100644 bolt/tools/archer/tests/races/task-taskgroup-unrelated.c create mode 100644 bolt/tools/archer/tests/races/task-taskwait-nested.c create mode 100644 bolt/tools/archer/tests/races/task-two.c create mode 100644 bolt/tools/archer/tests/reduction/parallel-reduction-nowait.c create mode 100644 bolt/tools/archer/tests/reduction/parallel-reduction.c create mode 100644 bolt/tools/archer/tests/task/task-barrier.c create mode 100644 bolt/tools/archer/tests/task/task-create.c create mode 100644 bolt/tools/archer/tests/task/task-dependency.c create mode 100644 bolt/tools/archer/tests/task/task-taskgroup-nested.c create mode 100644 bolt/tools/archer/tests/task/task-taskgroup.c create mode 100644 bolt/tools/archer/tests/task/task-taskwait-nested.c create mode 100644 bolt/tools/archer/tests/task/task-taskwait.c create mode 100644 bolt/tools/archer/tests/worksharing/ordered.c create mode 100644 bolt/tools/multiplex/CMakeLists.txt create mode 100644 bolt/tools/multiplex/README.md create mode 100644 bolt/tools/multiplex/ompt-multiplex.h create mode 100644 bolt/tools/multiplex/tests/CMakeLists.txt create mode 100644 bolt/tools/multiplex/tests/custom_data_storage/custom_data_storage.c create mode 100644 bolt/tools/multiplex/tests/custom_data_storage/first-tool.h create mode 100644 bolt/tools/multiplex/tests/custom_data_storage/second-tool.h create mode 100644 bolt/tools/multiplex/tests/lit.cfg create mode 100644 bolt/tools/multiplex/tests/lit.site.cfg.in create mode 100644 bolt/tools/multiplex/tests/ompt-signal.h create mode 100644 bolt/tools/multiplex/tests/print/first-tool.h create mode 100644 bolt/tools/multiplex/tests/print/print.c create mode 100644 bolt/tools/multiplex/tests/print/second-tool.h create mode 100644 bolt/www/README.txt create mode 100644 bolt/www/Reference.pdf create mode 100644 bolt/www/content.css create mode 100644 bolt/www/index.html create mode 100644 bolt/www/menu.css diff --git a/bolt/.github/workflows/CI.yml b/bolt/.github/workflows/CI.yml new file mode 100644 index 0000000000000..39b5bafef5e87 --- /dev/null +++ b/bolt/.github/workflows/CI.yml @@ -0,0 +1,29 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build-bolt: + + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ ubuntu-latest ] + abt: [ yes, no ] + steps: + - uses: actions/checkout@v2 + with: + submodules: 'true' + - name: cmake + run: | + mkdir build + cd build + cmake ../ -DLIBOMP_USE_ARGOBOTS=${{ matrix.abt }} -DOPENMP_ENABLE_WERROR=TRUE + - name: make + run: | + cd build + make -j 2 diff --git a/bolt/.gitignore b/bolt/.gitignore new file mode 100644 index 0000000000000..d4bec15d20574 --- /dev/null +++ b/bolt/.gitignore @@ -0,0 +1,42 @@ +#==============================================================================# +# This file specifies intentionally untracked files that git should ignore. +# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html +# +# This file is intentionally different from the output of `git svn show-ignore`, +# as most of those are useless. +#==============================================================================# + +#==============================================================================# +# File extensions to be ignored anywhere in the tree. +#==============================================================================# +# Temp files created by most text editors. +*~ +# Merge files created by git. +*.orig +# Byte compiled python modules. +*.pyc +# vim swap files +.*.sw? +.sw? +#OS X specific files. +.DS_store + +#==============================================================================# +# Explicit files to ignore (only matches one). +#==============================================================================# +# Various tag programs +tags +/TAGS +/GPATH +/GRTAGS +/GSYMS +/GTAGS +.gitusers + +#==============================================================================# +# Directories to ignore (do not add trailing '/'s, they skip symlinks). +#==============================================================================# +runtime/exports + +# Nested build directory +/build diff --git a/bolt/.gitmodules b/bolt/.gitmodules new file mode 100644 index 0000000000000..77f211bd4610f --- /dev/null +++ b/bolt/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/argobots"] + path = external/argobots + url = https://github.com/pmodels/argobots diff --git a/bolt/CHANGES.txt b/bolt/CHANGES.txt new file mode 100644 index 0000000000000..a2e29d837d108 --- /dev/null +++ b/bolt/CHANGES.txt @@ -0,0 +1,50 @@ +=============================================================================== + Changes in 1.0 +=============================================================================== + +- Upgraded to LLVM OpenMP 10.0 +- Upgraded Argobots to 1.0 +- Fixed support for untied tasks +- Added tests for OpenMP task and thread scheduling +- Support several platforms including OSX and POWER9. + +=============================================================================== + Changes in 1.0rc3 +=============================================================================== + +- Upgraded Argobots to 1.0rc2 to solve the TLS-related issue +- Fixed support for scheduler sleep + +=============================================================================== + Changes in 1.0rc2 +=============================================================================== + +- Upgraded to LLVM OpenMP 9.0 +- Improved the performance of nested parallel regions +- Support the thread affinity + +=============================================================================== + Changes in 1.0rc1 +=============================================================================== + +- Upgraded to LLVM OpenMP 7.0 +- Support task depend and taskloop +- Support OpenMP 4.5 except untied task and cancellation +- Argobots updated to the latest version and integrated as a git submodule + +=============================================================================== + Changes in 1.0b1 +=============================================================================== + +- Fixed missing some global state initialization +- Fixed bugs related to newer Perl versions +- Updated the embedded Argobots version + +=============================================================================== + Changes in 1.0a1 +=============================================================================== + +# The first release of BOLT, which uses Argobots as a threading layer. + +# Support OpenMP 3.1 + diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt new file mode 100644 index 0000000000000..32b8f7f93d236 --- /dev/null +++ b/bolt/CMakeLists.txt @@ -0,0 +1,106 @@ +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) + +# Add cmake directory to search for custom cmake functions. +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) + +# llvm/runtimes/ will set OPENMP_STANDALONE_BUILD. +if (OPENMP_STANDALONE_BUILD OR "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") + set(OPENMP_STANDALONE_BUILD TRUE) + project(bolt C CXX) + + # CMAKE_BUILD_TYPE was not set, default to Release. + if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) + endif() + + # Group common settings. + set(OPENMP_ENABLE_WERROR FALSE CACHE BOOL + "Enable -Werror flags to turn warnings into errors for supporting compilers.") + set(OPENMP_LIBDIR_SUFFIX "" CACHE STRING + "Suffix of lib installation directory, e.g. 64 => lib64") + # Do not use OPENMP_LIBDIR_SUFFIX directly, use OPENMP_INSTALL_LIBDIR. + set(OPENMP_INSTALL_LIBDIR "lib${OPENMP_LIBDIR_SUFFIX}") + + # Group test settings. + set(OPENMP_TEST_C_COMPILER ${CMAKE_C_COMPILER} CACHE STRING + "C compiler to use for testing OpenMP runtime libraries.") + set(OPENMP_TEST_CXX_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING + "C++ compiler to use for testing OpenMP runtime libraries.") + set(OPENMP_LLVM_TOOLS_DIR "" CACHE PATH "Path to LLVM tools for testing.") +else() + set(OPENMP_ENABLE_WERROR ${LLVM_ENABLE_WERROR}) + set(LIBOMP_USE_BOLT_DEFAULT FALSE CACHE BOOL "Use BOLT as a default LLVM OpenMP?") + if (${LIBOMP_USE_BOLT_DEFAULT}) + # If building in tree, we honor the same install suffix LLVM uses. + set(OPENMP_INSTALL_LIBDIR "lib${LLVM_LIBDIR_SUFFIX}") + else() + # If building in tree, we put BOLT libraries in a special directory + set(OPENMP_INSTALL_LIBDIR "lib${LLVM_LIBDIR_SUFFIX}/bolt") + endif() + # Place libraries in "lib/bolt" + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/bolt) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/bolt) + + if (NOT MSVC) + set(OPENMP_TEST_C_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang) + set(OPENMP_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++) + else() + set(OPENMP_TEST_C_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang.exe) + set(OPENMP_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++.exe) + endif() +endif() + +# Check and set up common compiler flags. +include(config-ix) +include(HandleOpenMPOptions) + +# Set up testing infrastructure. +include(OpenMPTesting) + +set(OPENMP_TEST_FLAGS "" CACHE STRING + "Extra compiler flags to send to the test compiler.") +set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING + "OpenMP compiler flag to use for testing OpenMP runtime libraries.") + +# Build external libraries. +add_subdirectory(external) + +# Build host runtime library. +add_subdirectory(runtime) + + +set(ENABLE_LIBOMPTARGET ON) +# Currently libomptarget cannot be compiled on Windows or MacOS X. +# Since the device plugins are only supported on Linux anyway, +# there is no point in trying to compile libomptarget on other OSes. +if (APPLE OR WIN32 OR NOT OPENMP_HAVE_STD_CPP14_FLAG) + set(ENABLE_LIBOMPTARGET OFF) +endif() + +option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading." + ${ENABLE_LIBOMPTARGET}) +if (OPENMP_ENABLE_LIBOMPTARGET) + # Check that the library can actually be built. + if (APPLE OR WIN32) + message(FATAL_ERROR "libomptarget cannot be built on Windows and MacOS X!") + elseif (NOT OPENMP_HAVE_STD_CPP14_FLAG) + message(FATAL_ERROR "Host compiler must support C++14 to build libomptarget!") + endif() + + add_subdirectory(libomptarget) +endif() + +set(ENABLE_OMPT_TOOLS ON) +# Currently tools are not tested well on Windows or MacOS X. +if (APPLE OR WIN32) + set(ENABLE_OMPT_TOOLS OFF) +endif() + +option(OPENMP_ENABLE_OMPT_TOOLS "Enable building ompt based tools for OpenMP." + ${ENABLE_OMPT_TOOLS}) +if (OPENMP_ENABLE_OMPT_TOOLS) + add_subdirectory(tools) +endif() + +# Now that we have seen all testsuites, create the check-openmp target. +construct_check_openmp_target() diff --git a/bolt/CREDITS.txt b/bolt/CREDITS.txt new file mode 100644 index 0000000000000..ede45b10fea25 --- /dev/null +++ b/bolt/CREDITS.txt @@ -0,0 +1,65 @@ +This file is a partial list of people who have contributed to the LLVM/openmp +project. If you have contributed a patch or made some other contribution to +LLVM/openmp, please submit a patch to this file to add yourself, and it will be +done! + +The list is sorted by surname and formatted to allow easy grepping and +beautification by scripts. The fields are: name (N), email (E), web-address +(W), PGP key ID and fingerprint (P), description (D), and snail-mail address +(S). + +N: Adam Azarchs +W: 10xgenomics.com +D: Bug fix for lock code + +N: Carlo Bertolli +W: http://ibm.com +D: IBM contributor to PowerPC support in CMake files and elsewhere. + +N: Diego Caballero +E: diego.l.caballero@gmail.com +D: Fork performance improvements + +N: Sunita Chandrasekaran +D: Contributor to testsuite from OpenUH + +N: Barbara Chapman +D: Contributor to testsuite from OpenUH + +N: University of Houston +W: http://web.cs.uh.edu/~openuh/download/ +D: OpenUH test suite + +N: Intel Corporation OpenMP runtime team +W: http://openmprtl.org +D: Created the runtime. + +N: John Mellor-Crummey and other members of the OpenMP Tools Working Group +E: johnmc@rice.edu +D: OpenMP Tools Interface (OMPT) + +N: Matthias Muller +D: Contributor to testsuite from OpenUH + +N: Tal Nevo +E: tal@scalemp.com +D: ScaleMP contributor to improve runtime performance there. +W: http://scalemp.com + +N: Pavel Neytchev +D: Contributor to testsuite from OpenUH + +N: Steven Noonan +E: steven@uplinklabs.net +D: Patches for the ARM architecture and removal of several inconsistencies. + +N: Joachim Protze +E: protze@itc.rwth-aachen.de +D: OpenMP Tools Interface, Archer tool + +N: Alp Toker +E: alp@nuanti.com +D: Making build work for FreeBSD. + +N: Cheng Wang +D: Contributor to testsuite from OpenUH diff --git a/bolt/LICENSE.txt b/bolt/LICENSE.txt new file mode 100644 index 0000000000000..2153a1bab8f52 --- /dev/null +++ b/bolt/LICENSE.txt @@ -0,0 +1,397 @@ +============================================================================== +BOLT is a derivative of the Intel OpenMP runtime. The original pieces of the +code from the Intel OpenMP runtime are copyrighted to Intel, and the pieces +modified for BOLT are copyrighted to UChicago Argonne, LLC. +============================================================================== + Copyright (c) 2016, UChicago Argonne, LLC + All Rights Reserved + BOLT: OpenMP over Lightweight Threads, SF-16-140 + OPEN SOURCE LICENSE +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. Software changes, + modifications, or derivative works, should be noted with comments and the + author and organization's name. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the names of UChicago Argonne, LLC or the Department of Energy nor + the names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. +4. The software and the end-user documentation included with the + redistribution, if any, must include the following acknowledgment: + "This product includes software produced by UChicago Argonne, LLC under + Contract No. DE-AC02-06CH11357 with the Department of Energy." +****************************************************************************** + DISCLAIMER + THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. +NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF +ENERGY, NOR UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY +WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY +FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, +APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT +INFRINGE PRIVATELY OWNED RIGHTS. +****************************************************************************** + +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== + +The software contained in this directory tree is dual licensed under both the +University of Illinois "BSD-Like" license and the MIT license. As a user of +this code you may choose to use it under either license. As a contributor, +you agree to allow your code to be used under both. The full text of the +relevant licenses is included below. + +In addition, a license agreement from the copyright/patent holders of the +software contained in this directory tree is included below. + +============================================================================== + +University of Illinois/NCSA +Open Source License + +Copyright (c) 1997-2019 Intel Corporation + +All rights reserved. + +Developed by: + OpenMP Runtime Team + Intel Corporation + http://www.openmprtl.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of Intel Corporation OpenMP Runtime Team nor the + names of its contributors may be used to endorse or promote products + derived from this Software without specific prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== + +Copyright (c) 1997-2019 Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +============================================================================== + +Intel Corporation + +Software Grant License Agreement ("Agreement") + +Except for the license granted herein to you, Intel Corporation ("Intel") reserves +all right, title, and interest in and to the Software (defined below). + +Definition + +"Software" means the code and documentation as well as any original work of +authorship, including any modifications or additions to an existing work, that +is intentionally submitted by Intel to llvm.org (http://llvm.org) ("LLVM") for +inclusion in, or documentation of, any of the products owned or managed by LLVM +(the "Work"). For the purposes of this definition, "submitted" means any form of +electronic, verbal, or written communication sent to LLVM or its +representatives, including but not limited to communication on electronic +mailing lists, source code control systems, and issue tracking systems that are +managed by, or on behalf of, LLVM for the purpose of discussing and improving +the Work, but excluding communication that is conspicuously marked otherwise. + +1. Grant of Copyright License. Subject to the terms and conditions of this + Agreement, Intel hereby grants to you and to recipients of the Software + distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, + royalty-free, irrevocable copyright license to reproduce, prepare derivative + works of, publicly display, publicly perform, sublicense, and distribute the + Software and such derivative works. + +2. Grant of Patent License. Subject to the terms and conditions of this + Agreement, Intel hereby grants you and to recipients of the Software + distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, + royalty-free, irrevocable (except as stated in this section) patent license + to make, have made, use, offer to sell, sell, import, and otherwise transfer + the Work, where such license applies only to those patent claims licensable + by Intel that are necessarily infringed by Intel's Software alone or by + combination of the Software with the Work to which such Software was + submitted. If any entity institutes patent litigation against Intel or any + other entity (including a cross-claim or counterclaim in a lawsuit) alleging + that Intel's Software, or the Work to which Intel has contributed constitutes + direct or contributory patent infringement, then any patent licenses granted + to that entity under this Agreement for the Software or Work shall terminate + as of the date such litigation is filed. + +Unless required by applicable law or agreed to in writing, the software is +provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +either express or implied, including, without limitation, any warranties or +conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. + +============================================================================== diff --git a/bolt/README.md b/bolt/README.md new file mode 100644 index 0000000000000..6776e77109468 --- /dev/null +++ b/bolt/README.md @@ -0,0 +1,263 @@ +# BOLT: OpenMP over Lightweight Threads + +BOLT targets a high-performing OpenMP implementation, especially specialized +for fine-grain parallelism. BOLT utilizes a lightweight threading model for +its underlying threading mechanism. It currently adopts Argobots, a new +holistic, low-level threading and tasking runtime, in order to overcome +shortcomings of conventional OS-level threads. The current BOLT implementation +is based on the OpenMP runtime in LLVM, and thus it can be used with +LLVM/Clang, Intel OpenMP compiler, and GCC. More information about BOLT can be +found at http://www.bolt-omp.org. + + +1. Getting Started +2. Testing BOLT +3. BOLT-Specific Environmental Variables +4. Reporting Problems +5. Alternate Build Options + + +------------------------------------------------------------------------------- + +1. Getting Started +================== + +The following instructions take you through a sequence of steps to get the +default configuration of BOLT up and running. + +Henceforth, VERSION indicates the version number of the release tarball. + +(a) You will need the following prerequisites. + + - REQUIRED: This tar file bolt-VERSION.tar.gz + + - REQUIRED: C and C++ compilers (gcc and g++ are sufficient) + + - REQUIRED: CMake (http://www.cmake.org/download) + + - OPTIONAL: Argobots (http://www.argobots.org) + The BOLT release tarball includes the Argobots source code, and + thus you can build BOLT together with the built-in Argobots. + Of course, you can use your own Argobots build instead of the + accompanied one. In the latter case, we assume Argobots has + been installed in /home/USERNAME/argobots-install. + + Also, you need to know what shell you are using since different shell has + different command syntax. Command "echo $SHELL" prints out the current shell + used by your terminal program. + + Note: if you obtained BOLT via github, the following commands download the + built-in Argobots from the Argobots repository. + + git submodule init + git submodule update + +(b) Unpack the tar file and create a build directory: + + tar xzf bolt-VERSION.tar.gz + mkdir bolt-build + cd bolt-build + + If your tar doesn't accept the z option, use + + gunzip bolt-VERSION.tar.gz + tar xf bolt-VERSION.tar + mkdir bolt-build + cd bolt-build + +(c) Choose an installation directory, say /home/USERNAME/bolt-install, which is +assumed to be non-existent or empty. + +(d) Configure BOLT specifying the installation directory: + + If you want to use the built-in Argobots, + + for csh and tcsh: + + cmake ../bolt-VERSION -G "Unix Makefiles" \ + -DCMAKE_INSTALL_PREFIX=/home/USERNAME/bolt-install \ + -DCMAKE_C_COMPILER= \ + -DCMAKE_CXX_COMPILER= \ + -DOPENMP_TEST_C_COMPILER= \ + -DOPENMP_TEST_CXX_COMPILER= \ + -DCMAKE_BUILD_TYPE=Release \ + -DLIBOMP_USE_ARGOBOTS=on \ + |& tee c.txt + + for bash and sh: + + cmake ../bolt-VERSION -G "Unix Makefiles" \ + -DCMAKE_INSTALL_PREFIX=/home/USERNAME/bolt-install \ + -DCMAKE_C_COMPILER= \ + -DCMAKE_CXX_COMPILER= \ + -DOPENMP_TEST_C_COMPILER= \ + -DOPENMP_TEST_CXX_COMPILER= \ + -DCMAKE_BUILD_TYPE=Release \ + -DLIBOMP_USE_ARGOBOTS=on \ + 2>&1 | tee c.txt + + If you want to use your own Argobots build, + + for csh and tcsh: + + cmake ../bolt-VERSION -G "Unix Makefiles" \ + -DCMAKE_INSTALL_PREFIX=/home/USERNAME/bolt-install \ + -DCMAKE_C_COMPILER= \ + -DCMAKE_CXX_COMPILER= \ + -DOPENMP_TEST_C_COMPILER= \ + -DOPENMP_TEST_CXX_COMPILER= \ + -DCMAKE_BUILD_TYPE=Release \ + -DLIBOMP_USE_ARGOBOTS=on \ + -DLIBOMP_ARGOBOTS_INSTALL_DIR=/home/USERNAME/argobots-install \ + |& tee c.txt + + for bash and sh: + + cmake ../bolt-VERSION -G "Unix Makefiles" \ + -DCMAKE_INSTALL_PREFIX=/home/USERNAME/bolt-install \ + -DCMAKE_C_COMPILER= \ + -DCMAKE_CXX_COMPILER= \ + -DOPENMP_TEST_C_COMPILER= \ + -DOPENMP_TEST_CXX_COMPILER= \ + -DCMAKE_BUILD_TYPE=Release \ + -DLIBOMP_USE_ARGOBOTS=on \ + -DLIBOMP_ARGOBOTS_INSTALL_DIR=/home/USERNAME/argobots-install \ + 2>&1 | tee c.txt + + Bourne-like shells, sh and bash, accept "2>&1 |". Csh-like shell, csh and + tcsh, accept "|&". If a failure occurs, the cmake command will display the + error. Most errors are straight-forward to follow. + +(e) Build BOLT: + + for csh and tcsh: + + make |& tee m.txt + + for bash and sh: + + make 2>&1 | tee m.txt + + This step should succeed if there were no problems with the preceding step. + Check file m.txt. If there were problems, do a "make clean" and then run + make again with V=1 and VERBOSE=1. + + make V=1 VERBOSE=1 |& tee m.txt (for csh and tcsh) + + OR + + make V=1 VERBOSE=1 2>&1 | tee m.txt (for bash and sh) + + Then go to step 3 below, for reporting the issue to the BOLT developers and + other users. + +(f) Install BOLT: + + for csh and tcsh: + + make install |& tee mi.txt + + for bash and sh: + + make install 2>&1 | tee mi.txt + + This step collects all required header and library files in the directory + specified by the prefix argument to cmake. + +------------------------------------------------------------------------------- + +2. Testing BOLT +=============== + +To test BOLT, you can run the test suite. Compilers for testing must be +specified when you run cmake. + +For example, if llvm-lit is installed: + + cd bolt-build + NUM_PARALLEL_TESTS=16 + llvm-lit runtime/test -v -j $NUM_PARALLEL_TESTS --timeout 600 + +If you run into any problems on running the test suite, please follow step 3 +below for reporting them to the BOLT developers and other users. + +------------------------------------------------------------------------------- + +3. BOLT-Specific Environmental Variables +=============== + +BOLT reveals several environmental variables specific to BOLT. + + KMP_ABT_NUM_ESS=: Set the number of execution streams which are + running on OS-level threads (e.g., Pthreads). + KMP_ABT_SCHED_SLEEP=<1|0>: If it is set to 1, sleep a scheduler when the + associate pools are empty. + KMP_ABT_VERBOSE=<1|0>: If it is set to 1, print all the BOLT-specific + parameters on runtime initialization. + KMP_ABT_FORK_CUTOFF=: Set the cut-off threshold used for a + divide-and-conquer thread creation. + KMP_ABT_FORK_NUM_WAYS=: Set the number of ways for a + divide-and-conquer thread creation. + KMP_ABT_SCHED_MIN_SLEEP_NSEC=: Set the minimum scheduler sleep time + (nanoseconds). + KMP_ABT_SCHED_MAX_SLEEP_NSEC=: Set the maximum scheduler sleep time + (nanoseconds). + KMP_ABT_SCHED_EVENT_FREQ=: Set the event-checking frequency of + schedulers. + KMP_ABT_WORK_STEAL_FREQ=: Set the random work stealing frequency of + schedulers. + +------------------------------------------------------------------------------- + +4. Reporting Problems +===================== + +If you have problems with the installation or usage of BOLT, please follow +these steps: + +(a) First visit the Frequently Asked Questions (FAQ) page at +https://github.com/pmodels/bolt/wiki/FAQ +to see if the problem you are facing has a simple solution. + +(b) If you cannot find an answer on the FAQ page, look through previous +email threads on the discuss@bolt-omp.org mailing list archive +(https://lists.bolt-omp.org/mailman/listinfo/discuss). It is likely +someone else had a similar problem, which has already been resolved +before. + +(c) If neither of the above steps work, please send an email to +discuss@bolt-omp.org. You need to subscribe to this list +(https://lists.bolt-omp.org/mailman/listinfo/discuss) before sending +an email. + +Your email should contain the following files. ONCE AGAIN, PLEASE +COMPRESS BEFORE SENDING, AS THE FILES CAN BE LARGE. Note that, +depending on which step the build failed, some of the files might not +exist. + + bolt-build/c.txt (generated in step 1(d) above) + bolt-build/m.txt (generated in step 1(e) above) + bolt-build/mi.txt (generated in step 1(f) above) + + DID WE MENTION? DO NOT FORGET TO COMPRESS THESE FILES! + +Finally, please include the actual error you are seeing when running +the application. If possible, please try to reproduce the error with +a smaller application or benchmark and send that along in your bug +report. + +(d) If you have found a bug in BOLT, we request that you report it +at our github issues page (https://github.com/pmodels/bolt/issues). +Even if you believe you have found a bug, we recommend you sending an +email to discuss@bolt-omp.org first. + +------------------------------------------------------------------------------- + +5. Alternate Build Options +============================== + +BOLT is based on the OpenMP subproject of LLVM for runtime, and thus it uses +the same build options provided in LLVM. + +Please visit http://openmp.llvm.org/ for more build options. + diff --git a/bolt/README.rst b/bolt/README.rst new file mode 100644 index 0000000000000..e46ed1a1a294b --- /dev/null +++ b/bolt/README.rst @@ -0,0 +1,340 @@ +======================================== +How to Build the LLVM* OpenMP* Libraries +======================================== +This repository requires `CMake `_ v2.8.0 or later. LLVM +and Clang need a more recent version which also applies for in-tree builds. For +more information than available in this document please see +`LLVM's CMake documentation `_ and the +`official documentation `_. + +.. contents:: + :local: + +How to Call CMake Initially, then Repeatedly +============================================ +- When calling CMake for the first time, all needed compiler options must be + specified on the command line. After this initial call to CMake, the compiler + definitions must not be included for further calls to CMake. Other options + can be specified on the command line multiple times including all definitions + in the build options section below. +- Example of configuring, building, reconfiguring, rebuilding: + + .. code-block:: console + + $ mkdir build + $ cd build + $ cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ .. # Initial configuration + $ make + ... + $ make clean + $ cmake -DCMAKE_BUILD_TYPE=Debug .. # Second configuration + $ make + ... + $ rm -rf * + $ cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ .. # Third configuration + $ make + +- Notice in the example how the compiler definitions are only specified for an + empty build directory, but other build options are used at any time. +- The file ``CMakeCache.txt`` which is created after the first call to CMake is + a configuration file which holds all values for the build options. These + values can be changed using a text editor to modify ``CMakeCache.txt`` as + opposed to using definitions on the command line. +- To have CMake create a particular type of build generator file simply include + the ``-G `` option: + + .. code-block:: console + + $ cmake -G "Unix Makefiles" ... + + You can see a list of generators CMake supports by executing the cmake command + with no arguments. + +Instructions to Build +===================== +.. code-block:: console + + $ cd openmp_top_level/ [ this directory with libomptarget/, runtime/, etc. ] + $ mkdir build + $ cd build + + [ Unix* Libraries ] + $ cmake -DCMAKE_C_COMPILER= -DCMAKE_CXX_COMPILER= .. + + [ Windows* Libraries ] + $ cmake -G -DCMAKE_C_COMPILER= -DCMAKE_CXX_COMPILER= -DCMAKE_ASM_MASM_COMPILER=[ml | ml64] -DCMAKE_BUILD_TYPE=Release .. + + $ make + $ make install + +CMake Options +============= +Builds with CMake can be customized by means of options as already seen above. +One possibility is to pass them via the command line: + +.. code-block:: console + + $ cmake -DOPTION= path/to/source + +.. note:: The first value listed is the respective default for that option. + +Generic Options +--------------- +For full documentation consult the CMake manual or execute +``cmake --help-variable VARIABLE_NAME`` to get information about a specific +variable. + +**CMAKE_BUILD_TYPE** = ``Release|Debug|RelWithDebInfo`` + Build type can be ``Release``, ``Debug``, or ``RelWithDebInfo`` which chooses + the optimization level and presence of debugging symbols. + +**CMAKE_C_COMPILER** = + Specify the C compiler. + +**CMAKE_CXX_COMPILER** = + Specify the C++ compiler. + +**CMAKE_Fortran_COMPILER** = + Specify the Fortran compiler. This option is only needed when + **LIBOMP_FORTRAN_MODULES** is ``ON`` (see below). So typically, a Fortran + compiler is not needed during the build. + +**CMAKE_ASM_MASM_COMPILER** = ``ml|ml64`` + This option is only relevant for Windows*. + +Options for all Libraries +------------------------- + +**OPENMP_ENABLE_WERROR** = ``OFF|ON`` + Treat warnings as errors and fail, if a compiler warning is triggered. + +**OPENMP_LIBDIR_SUFFIX** = ``""`` + Extra suffix to append to the directory where libraries are to be installed. + +**OPENMP_TEST_C_COMPILER** = ``${CMAKE_C_COMPILER}`` + Compiler to use for testing. Defaults to the compiler that was also used for + building. + +**OPENMP_TEST_CXX_COMPILER** = ``${CMAKE_CXX_COMPILER}`` + Compiler to use for testing. Defaults to the compiler that was also used for + building. + +**OPENMP_LLVM_TOOLS_DIR** = ``/path/to/built/llvm/tools`` + Additional path to search for LLVM tools needed by tests. + +**OPENMP_LLVM_LIT_EXECUTABLE** = ``/path/to/llvm-lit`` + Specify full path to ``llvm-lit`` executable for running tests. The default + is to search the ``PATH`` and the directory in **OPENMP_LLVM_TOOLS_DIR**. + +**OPENMP_FILECHECK_EXECUTABLE** = ``/path/to/FileCheck`` + Specify full path to ``FileCheck`` executable for running tests. The default + is to search the ``PATH`` and the directory in **OPENMP_LLVM_TOOLS_DIR**. + +**OPENMP_NOT_EXECUTABLE** = ``/path/to/not`` + Specify full path to ``not`` executable for running tests. The default + is to search the ``PATH`` and the directory in **OPENMP_LLVM_TOOLS_DIR**. + +Options for ``libomp`` +---------------------- + +**LIBOMP_ARCH** = ``aarch64|arm|i386|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64`` + The default value for this option is chosen based on probing the compiler for + architecture macros (e.g., is ``__x86_64__`` predefined by compiler?). + +**LIBOMP_MIC_ARCH** = ``knc|knf`` + Intel(R) Many Integrated Core Architecture (Intel(R) MIC Architecture) to + build for. This value is ignored if **LIBOMP_ARCH** does not equal ``mic``. + +**LIBOMP_LIB_TYPE** = ``normal|profile|stubs`` + Library type can be ``normal``, ``profile``, or ``stubs``. + +**LIBOMP_USE_VERSION_SYMBOLS** = ``ON|OFF`` + Use versioned symbols for building the library. This option only makes sense + for ELF based libraries where version symbols are supported (Linux*, some BSD* + variants). It is ``OFF`` by default for Windows* and macOS*, but ``ON`` for + other Unix based operating systems. + +**LIBOMP_ENABLE_SHARED** = ``ON|OFF`` + Build a shared library. If this option is ``OFF``, static OpenMP libraries + will be built instead of dynamic ones. + + .. note:: + + Static libraries are not supported on Windows*. + +**LIBOMP_FORTRAN_MODULES** = ``OFF|ON`` + Create the Fortran modules (requires Fortran compiler). + +macOS* Fat Libraries +"""""""""""""""""""" +On macOS* machines, it is possible to build universal (or fat) libraries which +include both i386 and x86_64 architecture objects in a single archive. + +.. code-block:: console + + $ cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_OSX_ARCHITECTURES='i386;x86_64' .. + $ make + +There is also an option **LIBOMP_OSX_ARCHITECTURES** which can be set in case +this is an LLVM source tree build. It will only apply for the ``libomp`` library +avoids having the entire LLVM/Clang build produce universal binaries. + +Optional Features +""""""""""""""""" + +**LIBOMP_USE_ADAPTIVE_LOCKS** = ``ON|OFF`` + Include adaptive locks, based on Intel(R) Transactional Synchronization + Extensions (Intel(R) TSX). This feature is x86 specific and turned ``ON`` + by default for IA-32 architecture and Intel(R) 64 architecture. + +**LIBOMP_USE_INTERNODE_ALIGNMENT** = ``OFF|ON`` + Align certain data structures on 4096-byte. This option is useful on + multi-node systems where a small ``CACHE_LINE`` setting leads to false sharing. + +**LIBBOLT_OMPT_SUPPORT** = ``ON|OFF`` + Include support for the OpenMP Tools Interface (OMPT). + This option is supported and ``ON`` by default for x86, x86_64, AArch64, + PPC64 and RISCV64 on Linux* and macOS*. + This option is ``OFF`` if this feature is not supported for the platform. + +**LIBOMP_OMPT_OPTIONAL** = ``ON|OFF`` + Include support for optional OMPT functionality. This option is ignored if + **LIBBOLT_OMPT_SUPPORT** is ``OFF``. + +**LIBOMP_STATS** = ``OFF|ON`` + Include stats-gathering code. + +**LIBOMP_USE_DEBUGGER** = ``OFF|ON`` + Include the friendly debugger interface. + +**LIBOMP_USE_HWLOC** = ``OFF|ON`` + Use `OpenMPI's hwloc library `_ for + topology detection and affinity. + +**LIBOMP_HWLOC_INSTALL_DIR** = ``/path/to/hwloc/install/dir`` + Specify install location of hwloc. The configuration system will look for + ``hwloc.h`` in ``${LIBOMP_HWLOC_INSTALL_DIR}/include`` and the library in + ``${LIBOMP_HWLOC_INSTALL_DIR}/lib``. The default is ``/usr/local``. + This option is only used if **LIBOMP_USE_HWLOC** is ``ON``. + +Additional Compiler Flags +""""""""""""""""""""""""" + +These flags are **appended**, they do not overwrite any of the preset flags. + +**LIBOMP_CPPFLAGS** = + Additional C preprocessor flags. + +**LIBOMP_CXXFLAGS** = + Additional C++ compiler flags. + +**LIBOMP_ASMFLAGS** = + Additional assembler flags. + +**LIBOMP_LDFLAGS** = + Additional linker flags. + +**LIBOMP_LIBFLAGS** = + Additional libraries to link. + +**LIBOMP_FFLAGS** = + Additional Fortran compiler flags. + +Options for ``libomptarget`` +---------------------------- + +**LIBOMPTARGET_OPENMP_HEADER_FOLDER** = ``""`` + Path of the folder that contains ``omp.h``. This is required for testing + out-of-tree builds. + +**LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER** = ``""`` + Path of the folder that contains ``libomp.so``. This is required for testing + out-of-tree builds. + +Options for ``NVPTX device RTL`` +-------------------------------- + +**LIBOMPTARGET_NVPTX_ENABLE_BCLIB** = ``ON|OFF`` + Enable CUDA LLVM bitcode offloading device RTL. This is used for link time + optimization of the OMP runtime and application code. This option is enabled + by default if the build system determines that `CMAKE_C_COMPILER` is able to + compile and link the library. + +**LIBOMPTARGET_NVPTX_CUDA_COMPILER** = ``""`` + Location of a CUDA compiler capable of emitting LLVM bitcode. Currently only + the Clang compiler is supported. This is only used when building the CUDA LLVM + bitcode offloading device RTL. If unspecified and the CMake C compiler is + Clang, then Clang is used. + +**LIBOMPTARGET_NVPTX_BC_LINKER** = ``""`` + Location of a linker capable of linking LLVM bitcode objects. This is only + used when building the CUDA LLVM bitcode offloading device RTL. If unspecified + and the CMake C compiler is Clang and there exists a llvm-link binary in the + directory containing Clang, then this llvm-link binary is used. + +**LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER** = ``""`` + Host compiler to use with NVCC. This compiler is not going to be used to + produce any binary. Instead, this is used to overcome the input compiler + checks done by NVCC. E.g. if using a default host compiler that is not + compatible with NVCC, this option can be use to pass to NVCC a valid compiler + to avoid the error. + + **LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES** = ``35`` + List of CUDA compute capabilities that should be supported by the NVPTX + device RTL. E.g. for compute capabilities 6.0 and 7.0, the option "60,70" + should be used. Compute capability 3.5 is the minimum required. + + **LIBOMPTARGET_NVPTX_DEBUG** = ``OFF|ON`` + Enable printing of debug messages from the NVPTX device RTL. + +Example Usages of CMake +======================= + +Typical Invocations +------------------- + +.. code-block:: console + + $ cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ .. + $ cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ .. + $ cmake -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc .. + +Advanced Builds with Various Options +------------------------------------ + +- Build the i386 Linux* library using GCC* + + .. code-block:: console + + $ cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DLIBOMP_ARCH=i386 .. + +- Build the x86_64 debug Mac library using Clang* + + .. code-block:: console + + $ cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DLIBOMP_ARCH=x86_64 -DCMAKE_BUILD_TYPE=Debug .. + +- Build the library (architecture determined by probing compiler) using the + Intel(R) C Compiler and the Intel(R) C++ Compiler. Also, create Fortran + modules with the Intel(R) Fortran Compiler. + + .. code-block:: console + + $ cmake -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DCMAKE_Fortran_COMPILER=ifort -DLIBOMP_FORTRAN_MODULES=on .. + +- Have CMake find the C/C++ compiler and specify additional flags for the + preprocessor and C++ compiler. + + .. code-blocks:: console + + $ cmake -DLIBOMP_CPPFLAGS='-DNEW_FEATURE=1 -DOLD_FEATURE=0' -DLIBOMP_CXXFLAGS='--one-specific-flag --two-specific-flag' .. + +- Build the stubs library + + .. code-blocks:: console + + $ cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DLIBOMP_LIB_TYPE=stubs .. + +**Footnotes** + +.. [*] Other names and brands may be claimed as the property of others. diff --git a/bolt/cmake/DetectTestCompiler/CMakeLists.txt b/bolt/cmake/DetectTestCompiler/CMakeLists.txt new file mode 100644 index 0000000000000..c230fc8d4cf1a --- /dev/null +++ b/bolt/cmake/DetectTestCompiler/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 2.8) +project(DetectTestCompiler C CXX) + +include(CheckCCompilerFlag) +include(CheckCXXCompilerFlag) + +function(write_compiler_information lang) + set(information "${CMAKE_${lang}_COMPILER}") + set(information "${information}\\;${CMAKE_${lang}_COMPILER_ID}") + set(information "${information}\\;${CMAKE_${lang}_COMPILER_VERSION}") + set(information "${information}\\;${${lang}_FLAGS}") + set(information "${information}\\;${${lang}_HAS_TSAN_FLAG}") + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${lang}CompilerInformation.txt ${information}) +endfunction(write_compiler_information) + +find_package(OpenMP) +if (NOT OpenMP_Found) + set(OpenMP_C_FLAGS "-fopenmp") + set(OpenMP_CXX_FLAGS "-fopenmp") +endif() + +set(CMAKE_THREAD_PREFER_PTHREAD TRUE) +set(THREADS_PREFER_PTHREAD_FLAG TRUE) +find_package(Threads REQUIRED) + +set(C_FLAGS "${OpenMP_C_FLAGS} ${CMAKE_THREAD_LIBS_INIT}") +set(CXX_FLAGS "${OpenMP_CXX_FLAGS} ${CMAKE_THREAD_LIBS_INIT}") + +# TODO: Implement blockaddress in GlobalISel and remove this flag! +if (CMAKE_C_COMPILER_ID STREQUAL "Clang") + check_c_compiler_flag("-fno-experimental-isel" C_HAS_EXPERIMENTAL_ISEL_FLAG) + check_cxx_compiler_flag("-fno-experimental-isel" CXX_HAS_EXPERIMENTAL_ISEL_FLAG) + macro(add_experimental_isel_flag lang) + if (${lang}_HAS_EXPERIMENTAL_ISEL_FLAG) + set(${lang}_FLAGS "-fno-experimental-isel ${${lang}_FLAGS}") + endif() + endmacro(add_experimental_isel_flag) + + add_experimental_isel_flag(C) + add_experimental_isel_flag(CXX) +endif() + +SET(CMAKE_REQUIRED_FLAGS "-fsanitize=thread") +check_c_compiler_flag("" C_HAS_TSAN_FLAG) +check_cxx_compiler_flag("" CXX_HAS_TSAN_FLAG) + +write_compiler_information(C) +write_compiler_information(CXX) diff --git a/bolt/cmake/HandleOpenMPOptions.cmake b/bolt/cmake/HandleOpenMPOptions.cmake new file mode 100644 index 0000000000000..15382bcf12de6 --- /dev/null +++ b/bolt/cmake/HandleOpenMPOptions.cmake @@ -0,0 +1,35 @@ +if (OPENMP_STANDALONE_BUILD) + # From HandleLLVMOptions.cmake + function(append_if condition value) + if (${condition}) + foreach(variable ${ARGN}) + set(${variable} "${${variable}} ${value}" PARENT_SCOPE) + endforeach(variable) + endif() + endfunction() +endif() + +# MSVC and clang-cl in compatibility mode map -Wall to -Weverything. +# TODO: LLVM adds /W4 instead, check if that works for the OpenMP runtimes. +if (NOT MSVC) + append_if(OPENMP_HAVE_WALL_FLAG "-Wall" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +endif() +if (OPENMP_ENABLE_WERROR) + append_if(OPENMP_HAVE_WERROR_FLAG "-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +endif() + +# Additional warnings that are not enabled by -Wall. +append_if(OPENMP_HAVE_WCAST_QUAL_FLAG "-Wcast-qual" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +append_if(OPENMP_HAVE_WFORMAT_PEDANTIC_FLAG "-Wformat-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +append_if(OPENMP_HAVE_WIMPLICIT_FALLTHROUGH_FLAG "-Wimplicit-fallthrough" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +append_if(OPENMP_HAVE_WSIGN_COMPARE_FLAG "-Wsign-compare" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + +# Warnings that we want to disable because they are too verbose or fragile. +append_if(OPENMP_HAVE_WNO_EXTRA_FLAG "-Wno-extra" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +append_if(OPENMP_HAVE_WNO_PEDANTIC_FLAG "-Wno-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +append_if(OPENMP_HAVE_WNO_MAYBE_UNINITIALIZED_FLAG "-Wno-maybe-uninitialized" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + +append_if(OPENMP_HAVE_STD_GNUPP14_FLAG "-std=gnu++14" CMAKE_CXX_FLAGS) +if (NOT OPENMP_HAVE_STD_GNUPP14_FLAG) + append_if(OPENMP_HAVE_STD_CPP14_FLAG "-std=c++14" CMAKE_CXX_FLAGS) +endif() diff --git a/bolt/cmake/OpenMPTesting.cmake b/bolt/cmake/OpenMPTesting.cmake new file mode 100644 index 0000000000000..51bdea977b456 --- /dev/null +++ b/bolt/cmake/OpenMPTesting.cmake @@ -0,0 +1,220 @@ +# Keep track if we have all dependencies. +set(ENABLE_CHECK_BOLT_TARGETS TRUE) + +# Function to find required dependencies for testing. +function(find_standalone_test_dependencies) + include(FindPythonInterp) + + if (NOT PYTHONINTERP_FOUND) + message(STATUS "Could not find Python.") + message(WARNING "The check targets will not be available!") + set(ENABLE_CHECK_BOLT_TARGETS FALSE PARENT_SCOPE) + return() + endif() + + # Find executables. + find_program(OPENMP_LLVM_LIT_EXECUTABLE + NAMES llvm-lit lit.py lit + PATHS ${OPENMP_LLVM_TOOLS_DIR}) + if (NOT OPENMP_LLVM_LIT_EXECUTABLE) + message(STATUS "Cannot find llvm-lit.") + message(STATUS "Please put llvm-lit in your PATH, set OPENMP_LLVM_LIT_EXECUTABLE to its full path, or point OPENMP_LLVM_TOOLS_DIR to its directory.") + message(WARNING "The check targets will not be available!") + set(ENABLE_CHECK_BOLT_TARGETS FALSE PARENT_SCOPE) + return() + endif() + + find_program(OPENMP_FILECHECK_EXECUTABLE + NAMES FileCheck + PATHS ${OPENMP_LLVM_TOOLS_DIR}) + if (NOT OPENMP_FILECHECK_EXECUTABLE) + message(STATUS "Cannot find FileCheck.") + message(STATUS "Please put FileCheck in your PATH, set OPENMP_FILECHECK_EXECUTABLE to its full path, or point OPENMP_LLVM_TOOLS_DIR to its directory.") + message(WARNING "The check targets will not be available!") + set(ENABLE_CHECK_BOLT_TARGETS FALSE PARENT_SCOPE) + return() + endif() + + find_program(OPENMP_NOT_EXECUTABLE + NAMES not + PATHS ${OPENMP_LLVM_TOOLS_DIR}) + if (NOT OPENMP_NOT_EXECUTABLE) + message(STATUS "Cannot find 'not'.") + message(STATUS "Please put 'not' in your PATH, set OPENMP_NOT_EXECUTABLE to its full path, or point OPENMP_LLVM_TOOLS_DIR to its directory.") + message(WARNING "The check targets will not be available!") + set(ENABLE_CHECK_BOLT_TARGETS FALSE PARENT_SCOPE) + return() + endif() +endfunction() + +if (${OPENMP_STANDALONE_BUILD}) + find_standalone_test_dependencies() + + # Make sure we can use the console pool for recent CMake and Ninja > 1.5. + if (CMAKE_VERSION VERSION_LESS 3.1.20141117) + set(cmake_3_2_USES_TERMINAL) + else() + set(cmake_3_2_USES_TERMINAL USES_TERMINAL) + endif() + + # Set lit arguments. + set(DEFAULT_LIT_ARGS "-sv --show-unsupported --show-xfail") + if (MSVC OR XCODE) + set(DEFAULT_LIT_ARGS "${DEFAULT_LIT_ARGS} --no-progress-bar") + endif() + set(OPENMP_LIT_ARGS "${DEFAULT_LIT_ARGS}" CACHE STRING "Options for lit.") + separate_arguments(OPENMP_LIT_ARGS) +else() + set(OPENMP_FILECHECK_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/FileCheck) + set(OPENMP_NOT_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/not) +endif() + +# Macro to extract information about compiler from file. (no own scope) +macro(extract_test_compiler_information lang file) + file(READ ${file} information) + list(GET information 0 path) + list(GET information 1 id) + list(GET information 2 version) + list(GET information 3 openmp_flags) + list(GET information 4 has_tsan_flags) + + set(OPENMP_TEST_${lang}_COMPILER_PATH ${path}) + set(OPENMP_TEST_${lang}_COMPILER_ID ${id}) + set(OPENMP_TEST_${lang}_COMPILER_VERSION ${version}) + set(OPENMP_TEST_${lang}_COMPILER_OPENMP_FLAGS ${openmp_flags}) + set(OPENMP_TEST_${lang}_COMPILER_HAS_TSAN_FLAGS ${has_tsan_flags}) +endmacro() + +# Function to set variables with information about the test compiler. +function(set_test_compiler_information dir) + extract_test_compiler_information(C ${dir}/CCompilerInformation.txt) + extract_test_compiler_information(CXX ${dir}/CXXCompilerInformation.txt) + if (NOT("${OPENMP_TEST_C_COMPILER_ID}" STREQUAL "${OPENMP_TEST_CXX_COMPILER_ID}" AND + "${OPENMP_TEST_C_COMPILER_VERSION}" STREQUAL "${OPENMP_TEST_CXX_COMPILER_VERSION}")) + message(STATUS "Test compilers for C and C++ don't match.") + message(WARNING "The check targets will not be available!") + set(ENABLE_CHECK_BOLT_TARGETS FALSE PARENT_SCOPE) + else() + set(OPENMP_TEST_COMPILER_ID "${OPENMP_TEST_C_COMPILER_ID}" PARENT_SCOPE) + set(OPENMP_TEST_COMPILER_VERSION "${OPENMP_TEST_C_COMPILER_VERSION}" PARENT_SCOPE) + set(OPENMP_TEST_COMPILER_OPENMP_FLAGS "${OPENMP_TEST_C_COMPILER_OPENMP_FLAGS}" PARENT_SCOPE) + set(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS "${OPENMP_TEST_C_COMPILER_HAS_TSAN_FLAGS}" PARENT_SCOPE) + + # Determine major version. + string(REGEX MATCH "[0-9]+" major "${OPENMP_TEST_C_COMPILER_VERSION}") + string(REGEX MATCH "[0-9]+\\.[0-9]+" majorminor "${OPENMP_TEST_C_COMPILER_VERSION}") + set(OPENMP_TEST_COMPILER_VERSION_MAJOR "${major}" PARENT_SCOPE) + set(OPENMP_TEST_COMPILER_VERSION_MAJOR_MINOR "${majorminor}" PARENT_SCOPE) + endif() +endfunction() + +if (${OPENMP_STANDALONE_BUILD}) + # Detect compiler that should be used for testing. + # We cannot use ExternalProject_Add() because its configuration runs when this + # project is built which is too late for detecting the compiler... + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DetectTestCompiler) + execute_process( + COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${CMAKE_CURRENT_LIST_DIR}/DetectTestCompiler + -DCMAKE_C_COMPILER=${OPENMP_TEST_C_COMPILER} + -DCMAKE_CXX_COMPILER=${OPENMP_TEST_CXX_COMPILER} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DetectTestCompiler + OUTPUT_VARIABLE DETECT_COMPILER_OUT + ERROR_VARIABLE DETECT_COMPILER_ERR + RESULT_VARIABLE DETECT_COMPILER_RESULT) + if (DETECT_COMPILER_RESULT) + message(STATUS "Could not detect test compilers.") + message(WARNING "The check targets will not be available!") + set(ENABLE_CHECK_BOLT_TARGETS FALSE) + else() + set_test_compiler_information(${CMAKE_CURRENT_BINARY_DIR}/DetectTestCompiler) + endif() +else() + # Set the information that we know. + set(OPENMP_TEST_COMPILER_ID "Clang") + # Cannot use CLANG_VERSION because we are not guaranteed that this is already set. + set(OPENMP_TEST_COMPILER_VERSION "${LLVM_VERSION}") + set(OPENMP_TEST_COMPILER_VERSION_MAJOR "${LLVM_VERSION_MAJOR}") + set(OPENMP_TEST_COMPILER_VERSION_MAJOR_MINOR "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}") + # Unfortunately the top-level cmake/config-ix.cmake file mangles CMake's + # CMAKE_THREAD_LIBS_INIT variable from the FindThreads package, so work + # around that, until it is fixed there. + if("${CMAKE_THREAD_LIBS_INIT}" STREQUAL "-lpthread") + set(OPENMP_TEST_COMPILER_THREAD_FLAGS "-pthread") + else() + set(OPENMP_TEST_COMPILER_THREAD_FLAGS "${CMAKE_THREAD_LIBS_INIT}") + endif() + if(TARGET tsan) + set(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS 1) + else() + set(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS 0) + endif() + # TODO: Implement blockaddress in GlobalISel and remove this flag! + set(OPENMP_TEST_COMPILER_OPENMP_FLAGS "-fopenmp ${OPENMP_TEST_COMPILER_THREAD_FLAGS} -fno-experimental-isel") +endif() + +# Function to set compiler features for use in lit. +function(set_test_compiler_features) + if ("${OPENMP_TEST_COMPILER_ID}" STREQUAL "GNU") + set(comp "gcc") + elseif ("${OPENMP_TEST_COMPILER_ID}" STREQUAL "Intel") + set(comp "icc") + else() + # Just use the lowercase of the compiler ID as fallback. + string(TOLOWER "${OPENMP_TEST_COMPILER_ID}" comp) + endif() + set(OPENMP_TEST_COMPILER_FEATURES "['${comp}', '${comp}-${OPENMP_TEST_COMPILER_VERSION_MAJOR}', '${comp}-${OPENMP_TEST_COMPILER_VERSION_MAJOR_MINOR}', '${comp}-${OPENMP_TEST_COMPILER_VERSION}']" PARENT_SCOPE) +endfunction() +set_test_compiler_features() + +# Function to add a testsuite for an OpenMP runtime library. +function(add_openmp_testsuite target comment) + if (NOT ENABLE_CHECK_BOLT_TARGETS) + add_custom_target(${target} + COMMAND ${CMAKE_COMMAND} -E echo "${target} does nothing, dependencies not found.") + message(STATUS "${target} does nothing.") + return() + endif() + + cmake_parse_arguments(ARG "EXCLUDE_FROM_CHECK_ALL" "" "DEPENDS;ARGS" ${ARGN}) + # EXCLUDE_FROM_CHECK_ALL excludes the test ${target} out of check-openmp. + if (NOT ARG_EXCLUDE_FROM_CHECK_ALL) + # Register the testsuites and depends for the check-openmp rule. + set_property(GLOBAL APPEND PROPERTY OPENMP_LIT_TESTSUITES ${ARG_UNPARSED_ARGUMENTS}) + set_property(GLOBAL APPEND PROPERTY OPENMP_LIT_DEPENDS ${ARG_DEPENDS}) + endif() + + if (${OPENMP_STANDALONE_BUILD}) + set(LIT_ARGS ${OPENMP_LIT_ARGS} ${ARG_ARGS}) + add_custom_target(${target} + COMMAND ${PYTHON_EXECUTABLE} ${OPENMP_LLVM_LIT_EXECUTABLE} ${LIT_ARGS} ${ARG_UNPARSED_ARGUMENTS} + COMMENT ${comment} + DEPENDS ${ARG_DEPENDS} + ${cmake_3_2_USES_TERMINAL} + ) + else() + if (ARG_EXCLUDE_FROM_CHECK_ALL) + add_lit_testsuite(${target} + ${comment} + ${ARG_UNPARSED_ARGUMENTS} + EXCLUDE_FROM_CHECK_ALL + DEPENDS clang clang-resource-headers FileCheck ${ARG_DEPENDS} + ARGS ${ARG_ARGS} + ) + else() + add_lit_testsuite(${target} + ${comment} + ${ARG_UNPARSED_ARGUMENTS} + DEPENDS clang clang-resource-headers FileCheck ${ARG_DEPENDS} + ARGS ${ARG_ARGS} + ) + endif() + endif() +endfunction() + +function(construct_check_openmp_target) + get_property(OPENMP_LIT_TESTSUITES GLOBAL PROPERTY OPENMP_LIT_TESTSUITES) + get_property(OPENMP_LIT_DEPENDS GLOBAL PROPERTY OPENMP_LIT_DEPENDS) + + # We already added the testsuites themselves, no need to do that again. + add_openmp_testsuite(check-bolt-openmp "Running BOLT tests" ${OPENMP_LIT_TESTSUITES} EXCLUDE_FROM_CHECK_ALL DEPENDS ${OPENMP_LIT_DEPENDS}) +endfunction() diff --git a/bolt/cmake/config-ix.cmake b/bolt/cmake/config-ix.cmake new file mode 100644 index 0000000000000..d9ea3bbb05749 --- /dev/null +++ b/bolt/cmake/config-ix.cmake @@ -0,0 +1,18 @@ +include(CheckCXXCompilerFlag) + +check_cxx_compiler_flag(-Wall OPENMP_HAVE_WALL_FLAG) +check_cxx_compiler_flag(-Werror OPENMP_HAVE_WERROR_FLAG) + +# Additional warnings that are not enabled by -Wall. +check_cxx_compiler_flag(-Wcast-qual OPENMP_HAVE_WCAST_QUAL_FLAG) +check_cxx_compiler_flag(-Wformat-pedantic OPENMP_HAVE_WFORMAT_PEDANTIC_FLAG) +check_cxx_compiler_flag(-Wimplicit-fallthrough OPENMP_HAVE_WIMPLICIT_FALLTHROUGH_FLAG) +check_cxx_compiler_flag(-Wsign-compare OPENMP_HAVE_WSIGN_COMPARE_FLAG) + +# Warnings that we want to disable because they are too verbose or fragile. +check_cxx_compiler_flag(-Wno-extra OPENMP_HAVE_WNO_EXTRA_FLAG) +check_cxx_compiler_flag(-Wno-pedantic OPENMP_HAVE_WNO_PEDANTIC_FLAG) +check_cxx_compiler_flag(-Wno-maybe-uninitialized OPENMP_HAVE_WNO_MAYBE_UNINITIALIZED_FLAG) + +check_cxx_compiler_flag(-std=gnu++14 OPENMP_HAVE_STD_GNUPP14_FLAG) +check_cxx_compiler_flag(-std=c++14 OPENMP_HAVE_STD_CPP14_FLAG) diff --git a/bolt/docs/ReleaseNotes.rst b/bolt/docs/ReleaseNotes.rst new file mode 100644 index 0000000000000..b7f2ec42277e3 --- /dev/null +++ b/bolt/docs/ReleaseNotes.rst @@ -0,0 +1,45 @@ +=========================== +openmp 11.0.0 Release Notes +=========================== + +.. contents:: + :local: + +.. warning:: + These are in-progress notes for the upcoming LLVM 11.0.0 release. + Release notes for previous releases can be found on + `the Download Page `_. + +Introduction +============ + +This document contains the release notes for the OpenMP runtime, release 11.0.0. +Here we describe the status of openmp, including major improvements +from the previous release. All openmp releases may be downloaded +from the `LLVM releases web site `_. + +Non-comprehensive list of changes in this release +================================================= + +5.0 features +------------ + +* ... + +5.1 features +------------ + +* ... + +OMPT Improvements +----------------- + +* Added OMPT callbacks for doacross loops, detached tasks +* Added handling for mutexinoutset dependencies + +OMPT-based Tools +---------------- + +* Added ompt-multiplex.h as a header-only OMPT-tool to support nesting of OMPT + tools. (see openmp/tools/multiplex) + diff --git a/bolt/examples/argobots/.gitignore b/bolt/examples/argobots/.gitignore new file mode 100644 index 0000000000000..d21c8ccd629e4 --- /dev/null +++ b/bolt/examples/argobots/.gitignore @@ -0,0 +1,25 @@ +parallel_for_omp +parallel_for_abt_task +parallel_for_abt_thread +nested_parallel_for_omp +nested_parallel_for_abt_task +nested_parallel_for_abt_thread +nested_parallel_for_block_abt_task +nested_parallel_for_block_abt_thread +nested_parallel_for_block_omp +nested_parallel_for_irregular_omp +nested_parallel_for_irregular_abt_task +nested_parallel_for_irregular_abt_thread +task_single_producer_omp +task_single_producer_abt_task +task_single_producer_abt_thread +task_multiple_producer_omp +task_multiple_producer_abt_task +task_multiple_producer_abt_thread +task_nested_omp +task_nested_abt_task +task_nested_lvl2_omp +task_nested_lvl2_abt_task +taskwait_omp +taskyield_omp + diff --git a/bolt/examples/argobots/Makefile b/bolt/examples/argobots/Makefile new file mode 100644 index 0000000000000..b8faccd103877 --- /dev/null +++ b/bolt/examples/argobots/Makefile @@ -0,0 +1,46 @@ +# -*- Mode: Makefile; -*- +# +# See LICENSE.txt in top-level directory. +# + +CC=gcc +CFLAGS=-g -Wall -O2 `pkg-config --cflags argobots` +LDFLAGS=-lm `pkg-config --libs argobots` + +PROGS = \ + parallel_for_abt_task \ + parallel_for_abt_thread \ + nested_parallel_for_abt_task \ + nested_parallel_for_abt_thread \ + nested_parallel_for_block_abt_thread \ + nested_parallel_for_block_abt_task \ + nested_parallel_for_irregular_abt_task \ + nested_parallel_for_irregular_abt_thread \ + task_single_producer_abt_task \ + task_single_producer_abt_thread \ + task_multiple_producer_abt_task \ + task_multiple_producer_abt_thread \ + task_nested_abt_task \ + task_nested_lvl2_abt_task + +all: $(PROGS) + +clean: + rm -rf *.o *.dSYM $(PROGS) + +testing: + ./parallel_for_abt_task + ./parallel_for_abt_thread + ./nested_parallel_for_abt_task + ./nested_parallel_for_abt_thread + ./nested_parallel_for_block_abt_thread + ./nested_parallel_for_block_abt_task + ./nested_parallel_for_irregular_abt_task + ./nested_parallel_for_irregular_abt_thread + ./task_single_producer_abt_task + ./task_single_producer_abt_thread + ./task_multiple_producer_abt_task + ./task_multiple_producer_abt_thread + ./task_nested_abt_task + ./task_nested_lvl2_abt_task + diff --git a/bolt/examples/argobots/Makefile.omp b/bolt/examples/argobots/Makefile.omp new file mode 100644 index 0000000000000..02db14d53901e --- /dev/null +++ b/bolt/examples/argobots/Makefile.omp @@ -0,0 +1,38 @@ +# -*- Mode: Makefile; -*- +# +# See LICENSE.txt in top-level directory. +# + +CC=gcc +CFLAGS=-g -Wall -O2 -fopenmp +LDFLAGS=-lm -fopenmp + +PROGS = \ + parallel_for_omp \ + nested_parallel_for_omp \ + nested_parallel_for_block_omp \ + nested_parallel_for_irregular_omp \ + task_single_producer_omp \ + task_multiple_producer_omp \ + task_nested_omp \ + task_nested_lvl2_omp \ + taskwait_omp \ + taskyield_omp + +all: $(PROGS) + +clean: + rm -rf *.o *.dSYM $(PROGS) + +testing: + ./parallel_for_omp + ./nested_parallel_for_omp + ./nested_parallel_for_block_omp + ./nested_parallel_for_irregular_omp + ./task_single_producer_omp + ./task_multiple_producer_omp + ./task_nested_omp + ./task_nested_lvl2_omp + ./taskwait_omp + ./taskyield_omp + diff --git a/bolt/examples/argobots/README b/bolt/examples/argobots/README new file mode 100644 index 0000000000000..3e899c85dd1f6 --- /dev/null +++ b/bolt/examples/argobots/README @@ -0,0 +1,87 @@ + OpenMP and Argobots + +This directory includes OpenMP code examples and their corresponding sample +implementations with Argobots APIs. These are just examples in order to show +how Argobots APIs can be used to mimic OpenMP behaviors. Please note that they +are not intended to provide optimized implementations. + +NOTE: +Examples in this directory are originated from those in examples of Argobots. + +1. Build and Testing +2. Examples + + +------------------------------------------------------------------------------- + +1. Build and Testing +==================== + +Once configure is done, there should be two Makefiles in this directory - +Makefile for Argobots examples and Makefile.omp for OpenMP examples. + +(a) To build and test Argobots examples: + + build: make + test : make testing + + +(b) To build and test OpenMP examples: + + build: make -f Makefile.omp + test : make -f Makefile.omp testing + +------------------------------------------------------------------------------- + +2. Examples +=========== + +parallel_for_{omp,abt_task,abt_thread}: + -- Implementation sample using a pragma omp parallel for directive + -- An Argobots code implementation is included in order to study the + performance + +nested_parallel_for_{omp,abt_task,abt_thread}: + -- Implementation sample using nested parallel constructions + -- An Argobots code implementation is included in order to study the + performance + +nested_parallel_for_irregular_{omp,abt_task,abt_thread}: + -- Implementation sample using nested parallel constructions when each iteration + has different workload + -- An Argobots code implementation is included in order to study the + performance + +task_single_producer_{omp,abt_task,abt_thread}: + -- Implementation sample where just a thread creates all the work (tasks) and + the other threads execute the work generated. + -- An Argobots code implementation is also included to study the performance. + -- main motivation for the Intel OpenMP Runtime modification + +task_multiple_producer_{omp,abt_task,abt_thread}: + -- Implementation sample where all thread creates all the work into its own queue + or pool and then execute the work generated. + -- An Argobots code implementation is also included to study the performance. + -- Mainly used in order to understand the cutoff mechanism implemented by + GCC and Intel OpemMP Libraries + +task_nested_{omp,abt_task}: + -- Implementation sample used mainly to know the behavior when a nested task (lvl 1) + is created. + -- An Argobots code implementation is included in order to study the performance + +task_nested_lvl2_{omp,abt_task}: + -- Implementation sample used mainly to know the behavior when a nested task (lvl 2) + is created. + -- An Argobots code implementation is included in order to study the performance + +taskwait_omp: + -- Implementation sample used to know the behavior when a taskwait directive + is used or not + -- Mainly used to get the generated code for each compiler + +taskyield_omp: + -- Implementation sample used to know the behavior when a taskyield directive + is used or not + -- Mainly used to get the generated code for each compiler + diff --git a/bolt/examples/argobots/nested_parallel_for_abt_task.c b/bolt/examples/argobots/nested_parallel_for_abt_task.c new file mode 100644 index 0000000000000..ac2fc61ea8452 --- /dev/null +++ b/bolt/examples/argobots/nested_parallel_for_abt_task.c @@ -0,0 +1,224 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code mimics the parallel for OpenMP directive in nested loops. + * It creates as many streams as user requires and threads are created and + * assigned by static blocs to each stream for the outer loop. + * For the inner loop, as many task as the user requires are created. + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_ELEMS 5017600 /* 2GB */ +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +ABT_pool *g_pools; + +typedef struct { + float *ptr; + float value; + int rank; + int it; + int start; + int end; +} vector_scal_args_t; + +typedef struct { + float *ptr; + float value; + int nxstreams; + int it; + int start; + int end; +} vector_scal_task_args_t; + +void vector_scal(void *arguments) +{ + int i, rank; + vector_scal_args_t *arg; + arg = (vector_scal_args_t *)arguments; + ABT_xstream_self_rank(&rank); + rank = arg->rank; + int mystart = arg->start; + int myend = arg->end; + int it = arg->it; + int base = rank * it; + float value = arg->value; + float *ptr = arg->ptr; + for (i = mystart; i < myend; i++) { + ptr[base + i] *= value; + } +} + +void vector_scal_launch(void *arguments) +{ + int i, it, j, num_ults, rank, mystart, myend, p; + ABT_task *tasks; + ABT_xstream xstream; + ABT_xstream_self(&xstream); + vector_scal_task_args_t *arg; + arg = (vector_scal_task_args_t *) arguments; + vector_scal_args_t *args; + it = arg->it; + num_ults = arg->nxstreams; + mystart = arg->start; + myend = arg->end; + int current = 0; + args = (vector_scal_args_t *)malloc(sizeof(vector_scal_args_t) + * num_ults); + tasks = (ABT_task *)malloc(sizeof(ABT_task) * num_ults); + /* ES creation */ + int bloc = it / (num_ults); + int rest = it % (num_ults); + int start = 0; + int end = 0; + ABT_xstream_self_rank(&rank); + for (i = mystart; i < myend; i++) { + for (j = 0; j < num_ults; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].value = arg->value; + args[j].ptr = arg->ptr; + args[j].it = it; + args[j].rank = rank; + + ABT_task_create(g_pools[rank], vector_scal, + (void *)&args[j], &tasks[j]); + } + current++; + for (p = 0; p < num_ults; p++) { + ABT_task_free(&tasks[p]); + } + } + ABT_thread_yield(); +} + + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int num_xstreams; + char *str, *endptr; + ABT_xstream *xstreams; + vector_scal_task_args_t *args; + struct timeval t_start, t_end; + struct timeval t_start2; + double time, time_join; + float *a; + int it; + int inner_xstreams; + + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_ELEMS; + it = ceil(sqrt(ntasks)); + ntasks = it * it; + inner_xstreams = argc > 3 ? atoi(argv[3]) : NUM_XSTREAMS; + + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + + a = malloc(sizeof(float) * ntasks); + for (i = 0; i < ntasks; i++) { + a[i] = i * 1.0f; + } + + xstreams = (ABT_xstream *) malloc(sizeof(ABT_xstream) * num_xstreams); + args = (vector_scal_task_args_t *) malloc(sizeof(vector_scal_task_args_t) + * num_xstreams); + + /* initialization */ + ABT_init(argc, argv); + + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + } + + gettimeofday(&t_start, NULL); + + /* Each task is created on the xstream which is going to execute it */ + + int bloc = it / (num_xstreams); + int rest = it % (num_xstreams); + int start = 0; + int end = 0; + + for (j = 0; j < num_xstreams; j++) { + + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].value = 0.9f; + args[j].ptr = a; + args[j].it = it; + args[j].nxstreams = inner_xstreams; + + ABT_thread_create_on_xstream(xstreams[j], vector_scal_launch, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + NULL); + } + + ABT_thread_yield(); + + gettimeofday(&t_start2, NULL); + for (i = 1; i < num_xstreams; i++) { + size_t size; + do { + ABT_pool_get_size(g_pools[i], &size); + } while (size != 0); + } + + gettimeofday(&t_end, NULL); + time = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start.tv_sec * 1000000 + t_start.tv_usec); + time_join = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + + printf("%d %d %d %f %f\n", + num_xstreams, inner_xstreams, ntasks, time / 1000000.0, + time_join / 1000000.0); + + ABT_finalize(); + free(xstreams); + for (i = 0; i < ntasks; i++) { + if (a[i] != i * 0.9f) { + printf("%f\n", a[i]); + return EXIT_FAILURE; + } + } + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/nested_parallel_for_abt_thread.c b/bolt/examples/argobots/nested_parallel_for_abt_thread.c new file mode 100644 index 0000000000000..f6994ec163a9e --- /dev/null +++ b/bolt/examples/argobots/nested_parallel_for_abt_thread.c @@ -0,0 +1,228 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code mimics the parallel for OpenMP directive in nested loops. + * It creates as many streams as user requires and threads are created and + * assigned by static blocs to each stream for the outer loop. + * For the inner loop, as many threads as the user requires are created. + */ + + +#include +#include +#include +#include +#include +#include + +#define NUM_ELEMS 5017600 /* 2GB */ +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +ABT_pool *g_pools; + +typedef struct { + float *ptr; + float value; + int rank; + int it; + int start; + int end; +} vector_scal_args_t; + +typedef struct { + float *ptr; + float value; + int nxstreams; + int it; + int start; + int end; +} vector_scal_task_args_t; + +void vector_scal(void *arguments) +{ + int i, rank; + vector_scal_args_t *arg; + arg = (vector_scal_args_t *)arguments; + ABT_xstream_self_rank(&rank); + rank = arg->rank; + int mystart = arg->start; + int myend = arg->end; + int it = arg->it; + int base = rank * it; + float value = arg->value; + float *ptr = arg->ptr; + for (i = mystart; i < myend; i++) { + ptr[base + i] *= value; + } +} + +void vector_scal_launch(void *arguments) +{ + int i, it, j, num_ults, rank, mystart, myend, p; + ABT_thread *threads; + ABT_xstream xstream; + ABT_xstream_self(&xstream); + vector_scal_task_args_t *arg; + arg = (vector_scal_task_args_t *) arguments; + vector_scal_args_t *args; + it = arg->it; + num_ults = arg->nxstreams; + mystart = arg->start; + myend = arg->end; + int current = 0; + + args = (vector_scal_args_t *)malloc(sizeof(vector_scal_args_t) + * num_ults); + + threads = (ABT_thread *)malloc(sizeof(ABT_thread) * num_ults); + + int bloc = it / (num_ults); + int rest = it % (num_ults); + int start = 0; + int end = 0; + ABT_xstream_self_rank(&rank); + for (i = mystart; i < myend; i++) { + for (j = 0; j < num_ults; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].value = arg->value; + args[j].ptr = arg->ptr; + args[j].it = it; + args[j].rank = rank; + + ABT_thread_create(g_pools[rank], vector_scal, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + &threads[j]); + } + current++; + for (p = 0; p < num_ults; p++) { + ABT_thread_free(&threads[p]); + } + } + ABT_thread_yield(); +} + + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int num_xstreams; + char *str, *endptr; + ABT_xstream *xstreams; + vector_scal_task_args_t *args; + struct timeval t_start, t_end; + struct timeval t_start2; + float *a; + int it; + int inner_xstreams; + double time, time_join; + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_ELEMS; + it = ceil(sqrt(ntasks)); + ntasks = it * it; + inner_xstreams = argc > 3 ? atoi(argv[3]) : NUM_XSTREAMS; + + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + + a = malloc(sizeof(float) * ntasks); + for (i = 0; i < ntasks; i++) { + a[i] = i * 1.0f; + } + + xstreams = (ABT_xstream *) malloc(sizeof(ABT_xstream) * num_xstreams); + args = (vector_scal_task_args_t *) malloc(sizeof(vector_scal_task_args_t) + * num_xstreams); + + /* initialization */ + ABT_init(argc, argv); + + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + + gettimeofday(&t_start, NULL); + + /* Each task is created on the xstream which is going to execute it */ + + int bloc = it / (num_xstreams); + int rest = it % (num_xstreams); + int start = 0; + int end = 0; + + for (j = 0; j < num_xstreams; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].value = 0.9f; + args[j].ptr = a; + args[j].it = it; + args[j].nxstreams = inner_xstreams; + ABT_thread_create_on_xstream(xstreams[j], vector_scal_launch, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + NULL); + } + + ABT_thread_yield(); + + gettimeofday(&t_start2, NULL); + for (i = 1; i < num_xstreams; i++) { + size_t size; + do { + ABT_pool_get_size(g_pools[i], &size); + } while (size != 0); + } + + gettimeofday(&t_end, NULL); + time = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start.tv_sec * 1000000 + t_start.tv_usec); + time_join = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + + printf("%d %d %d %f %f\n", + num_xstreams, inner_xstreams, ntasks, time / 1000000.0, + time_join / 1000000.0); + + ABT_finalize(); + + free(xstreams); + + for (i = 0; i < ntasks; i++) { + if (a[i] != i * 0.9f) { + printf("%f\n", a[i]); + return EXIT_FAILURE; + } + } + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/nested_parallel_for_block_abt_task.c b/bolt/examples/argobots/nested_parallel_for_block_abt_task.c new file mode 100644 index 0000000000000..de762bb5d8adf --- /dev/null +++ b/bolt/examples/argobots/nested_parallel_for_block_abt_task.c @@ -0,0 +1,239 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code mimics the parallel for OpenMP directive in nested loops. + * It creates as many streams as user requires and threads are created and + * assigned by static blocs to each stream for the outer loop. + * For the inner loop, as many threads as the user requires are created. + */ + + +#include +#include +#include +#include +#include +#include +#include + +#define NUM_XSTREAMS 36 +#define NUM 1000 +#define NUM_REPS 1 + +int in[NUM][NUM]; +int out[NUM][NUM]; + +/* Vector initialization */ +void init(void) +{ + int i, j; + for (i = 0; i < NUM; i++) { + for (j = 0; j < NUM; j++) { + in[i][j] = 1; + out[i][j] = 0; + } + } +} + +int comp(int v) +{ + int i; + double ret = 0.0; + for (i = 0; i < 100; i++) { + ret += sqrt(cos((double)v) * sin((double)v)); + } + return (int)ret; +} + +void check(void) +{ + int i, j; + for (i = 0; i < NUM; i++) { + for (j = 0; j < NUM; j++) { + int expected = comp(in[i][j]); + if (out[i][j] != expected) { + printf("out[%d][%d]=%d expected=%d\n", i, j, out[i][j], expected); + return; + } + } + } + printf("Verification: SUCCESS\n"); +} + + +static ABT_pool *g_pools; + +typedef struct { + int start; + int end; + int x; +} vector_scal_args_t; + +typedef struct { + int nxstreams; + int it; + int start; + int end; +} vector_scal_task_args_t; + +void vector_scal(void *arguments) +{ + int j; + vector_scal_args_t *arg; + arg = (vector_scal_args_t *)arguments; + + int mystart = arg->start; + int myend = arg->end; + int x = arg->x; + + for (j = mystart; j < myend; j++) { + out[x][j] = comp(in[x][j]); + } +} + +void vector_scal_launch(void *arguments) +{ + int i, it, j, num_ults, rank, mystart, myend, p; + ABT_task *tasks; + ABT_xstream xstream; + ABT_xstream_self(&xstream); + vector_scal_task_args_t *arg; + arg = (vector_scal_task_args_t *) arguments; + vector_scal_args_t *args; + it = arg->it; + num_ults = arg->nxstreams; + mystart = arg->start; + myend = arg->end; + + args = (vector_scal_args_t *)malloc(sizeof(vector_scal_args_t) + * num_ults); + + tasks = (ABT_task *)malloc(sizeof(ABT_task) * num_ults); + + int bloc = it / (num_ults); + int rest = it % (num_ults); + ABT_xstream_self_rank(&rank); + for (i = mystart; i < myend; i++) { + int start = 0; + int end = 0; + for (j = 0; j < num_ults; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].x = i; + + if (j > 0) { + ABT_task_create(g_pools[rank], vector_scal, (void *)&args[j], &tasks[j]); + } + } + vector_scal((void *)&args[0]); + for (p = 1; p < num_ults; p++) { + ABT_task_free(&tasks[p]); + } + } + + free(tasks); + free(args); +} + + +int main(int argc, char *argv[]) +{ + int i, j, r; + int num_xstreams; + char *str, *endptr; + ABT_xstream *xstreams; + ABT_thread *threads; + vector_scal_task_args_t *args; + int inner_xstreams; + double *time, avg_time = 0.0; + + num_xstreams = (argc > 1) ? atoi(argv[1]) : NUM_XSTREAMS; + inner_xstreams = (argc > 2) ? atoi(argv[2]) : NUM_XSTREAMS; + int rep = (argc > 3) ? atoi(argv[3]) : NUM_REPS; + time = (double *)malloc(sizeof(double) * rep); + + init(); + + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + threads = (ABT_thread *)malloc(sizeof(ABT_thread) * num_xstreams); + args = (vector_scal_task_args_t *)malloc(sizeof(vector_scal_task_args_t) + * num_xstreams); + + /* initialization */ + ABT_init(argc, argv); + + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + + /* Each task is created on the xstream which is going to execute it */ + + for (r = 0; r < rep; r++) { + time[r] = ABT_get_wtime(); + + int bloc = NUM / (num_xstreams); + int rest = NUM % (num_xstreams); + int start = 0; + int end = 0; + + for (j = 0; j < num_xstreams; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].it = NUM; + args[j].nxstreams = inner_xstreams; + if (j > 0) { + ABT_thread_create(g_pools[j], vector_scal_launch, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + &threads[j]); + } + } + vector_scal_launch((void *)&args[0]); + + for (j = 1; j < num_xstreams; j++) { + ABT_thread_free(&threads[j]); + } + + time[r] = ABT_get_wtime() - time[r]; + avg_time += time[r]; + } + avg_time /= rep; + printf("%d %d %f\n", num_xstreams, inner_xstreams, avg_time); + check(); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + + ABT_finalize(); + + free(g_pools); + free(xstreams); + free(threads); + free(args); + free(time); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/nested_parallel_for_block_abt_thread.c b/bolt/examples/argobots/nested_parallel_for_block_abt_thread.c new file mode 100644 index 0000000000000..9b3fe45f44f59 --- /dev/null +++ b/bolt/examples/argobots/nested_parallel_for_block_abt_thread.c @@ -0,0 +1,241 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code mimics the parallel for OpenMP directive in nested loops. + * It creates as many streams as user requires and threads are created and + * assigned by static blocs to each stream for the outer loop. + * For the inner loop, as many threads as the user requires are created. + */ + + +#include +#include +#include +#include +#include +#include +#include + +#define NUM_XSTREAMS 36 +#define NUM 1000 +#define NUM_REPS 1 + +int in[NUM][NUM]; +int out[NUM][NUM]; + +/* Vector initialization */ +void init(void) +{ + int i, j; + for (i = 0; i < NUM; i++) { + for (j = 0; j < NUM; j++) { + in[i][j] = 1; + out[i][j] = 0; + } + } +} + +int comp(int v) +{ + int i; + double ret = 0.0; + for (i = 0; i < 100; i++) { + ret += sqrt(cos((double)v) * sin((double)v)); + } + return (int)ret; +} + +void check(void) +{ + int i, j; + for (i = 0; i < NUM; i++) { + for (j = 0; j < NUM; j++) { + int expected = comp(in[i][j]); + if (out[i][j] != expected) { + printf("out[%d][%d]=%d expected=%d\n", i, j, out[i][j], expected); + return; + } + } + } + printf("Verification: SUCCESS\n"); +} + + +static ABT_pool *g_pools; + +typedef struct { + int start; + int end; + int x; +} vector_scal_args_t; + +typedef struct { + int nxstreams; + int it; + int start; + int end; +} vector_scal_task_args_t; + +void vector_scal(void *arguments) +{ + int j; + vector_scal_args_t *arg; + arg = (vector_scal_args_t *)arguments; + + int mystart = arg->start; + int myend = arg->end; + int x = arg->x; + + for (j = mystart; j < myend; j++) { + out[x][j] = comp(in[x][j]); + } +} + +void vector_scal_launch(void *arguments) +{ + int i, it, j, num_ults, rank, mystart, myend, p; + ABT_thread *threads; + ABT_xstream xstream; + ABT_xstream_self(&xstream); + vector_scal_task_args_t *arg; + arg = (vector_scal_task_args_t *) arguments; + vector_scal_args_t *args; + it = arg->it; + num_ults = arg->nxstreams; + mystart = arg->start; + myend = arg->end; + + args = (vector_scal_args_t *)malloc(sizeof(vector_scal_args_t) + * num_ults); + + threads = (ABT_thread *)malloc(sizeof(ABT_thread) * num_ults); + + int bloc = it / (num_ults); + int rest = it % (num_ults); + ABT_xstream_self_rank(&rank); + for (i = mystart; i < myend; i++) { + int start = 0; + int end = 0; + for (j = 0; j < num_ults; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].x = i; + + if (j > 0) { + ABT_thread_create(g_pools[rank], vector_scal, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + &threads[j]); + } + } + vector_scal((void *)&args[0]); + for (p = 1; p < num_ults; p++) { + ABT_thread_free(&threads[p]); + } + } + + free(threads); + free(args); +} + + +int main(int argc, char *argv[]) +{ + int i, j, r; + int num_xstreams; + char *str, *endptr; + ABT_xstream *xstreams; + ABT_thread *threads; + vector_scal_task_args_t *args; + int inner_xstreams; + double *time, avg_time = 0.0; + + num_xstreams = (argc > 1) ? atoi(argv[1]) : NUM_XSTREAMS; + inner_xstreams = (argc > 2) ? atoi(argv[2]) : NUM_XSTREAMS; + int rep = (argc > 3) ? atoi(argv[3]) : NUM_REPS; + time = (double *)malloc(sizeof(double) * rep); + + init(); + + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + threads = (ABT_thread *)malloc(sizeof(ABT_thread) * num_xstreams); + args = (vector_scal_task_args_t *)malloc(sizeof(vector_scal_task_args_t) + * num_xstreams); + + /* initialization */ + ABT_init(argc, argv); + + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + + /* Each task is created on the xstream which is going to execute it */ + + for (r = 0; r < rep; r++) { + time[r] = ABT_get_wtime(); + + int bloc = NUM / (num_xstreams); + int rest = NUM % (num_xstreams); + int start = 0; + int end = 0; + + for (j = 0; j < num_xstreams; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].it = NUM; + args[j].nxstreams = inner_xstreams; + if (j > 0) { + ABT_thread_create(g_pools[j], vector_scal_launch, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + &threads[j]); + } + } + vector_scal_launch((void *)&args[0]); + + for (j = 1; j < num_xstreams; j++) { + ABT_thread_free(&threads[j]); + } + + time[r] = ABT_get_wtime() - time[r]; + avg_time += time[r]; + } + avg_time /= rep; + printf("%d %d %f\n", num_xstreams, inner_xstreams, avg_time); + check(); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + + ABT_finalize(); + + free(g_pools); + free(xstreams); + free(threads); + free(args); + free(time); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/nested_parallel_for_block_omp.c b/bolt/examples/argobots/nested_parallel_for_block_omp.c new file mode 100644 index 0000000000000..df84555f855ea --- /dev/null +++ b/bolt/examples/argobots/nested_parallel_for_block_omp.c @@ -0,0 +1,105 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* Nested Pragma omp parallel for directives evaluation + * Output: avg time + */ + +#include +#include +#include +#include +#include +#include + +#define NUM 1000 +#define NUM_REPS 10 + +int in[NUM][NUM]; +int out[NUM][NUM]; + +/* Vector initialization */ +void init(void) +{ + int i, j; + for (i = 0; i < NUM; i++) { + for (j = 0; j < NUM; j++) { + in[i][j] = 1; + out[i][j] = 0; + } + } +} + +int comp(int v) +{ + int i; + double ret = 0.0; + for (i = 0; i < 100; i++) { + ret += sqrt(cos((double)v) * sin((double)v)); + } + return (int)ret; +} + +void petsc_voodoo(int x) +{ + int j; + + #pragma omp parallel for + for (j = 0; j < NUM; j++) { + out[x][j] = comp(in[x][j]); + } +} + +void check(void) +{ + int i, j; + for (i = 0; i < NUM; i++) { + for (j = 0; j < NUM; j++) { + int expected = comp(in[i][j]); + if (out[i][j] != expected) { + printf("out[%d][%d]=%d expected=%d\n", i, j, out[i][j], expected); + return; + } + } + } + printf("Verification: SUCCESS\n"); +} + +int main(int argc, char *argv[]) +{ + int i, j, r, nthreads; + double *time, avg_time = 0.0; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + int in_th = (argc > 1) ? atoi(argv[1]) : nthreads; + int rep = (argc > 2) ? atoi(argv[2]) : NUM_REPS; + time = (double *)malloc(sizeof(double) * rep); + init(); + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + + #pragma omp parallel for + for (i = 0; i < NUM; i++) { + omp_set_num_threads(in_th); + petsc_voodoo(i); + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + } + avg_time /= rep; + printf("%d %d %f\n", nthreads, in_th, avg_time); + check(); + + free(time); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/nested_parallel_for_irregular_abt_task.c b/bolt/examples/argobots/nested_parallel_for_irregular_abt_task.c new file mode 100644 index 0000000000000..162b4e37c96f9 --- /dev/null +++ b/bolt/examples/argobots/nested_parallel_for_irregular_abt_task.c @@ -0,0 +1,245 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code mimics the parallel for OpenMP directive in nested loops. + * It creates as many streams as user requires and threads are created and + * assigned by static blocs to each stream for the outer loop. + * For the inner loop, as many task as the user requires are created. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define NUM_ELEMS 5017600 /* 2GB */ +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +ABT_pool *g_pools; + +typedef struct { + int start; + int end; +} vector_scal_args_t; + +typedef struct { + int nxstreams; + int it; + int start; + int end; +} vector_scal_task_args_t; + +void exe_random(void *arguments) +{ + int i, k; + vector_scal_args_t *arg; + arg = (vector_scal_args_t *) arguments; + int mystart = arg->start; + int myend = arg->end; + for (i = mystart; i < myend; i++) { + int random = rand() % 10000; + int kk = 0; + for (k = 0; k < random; k++) + kk++; + assert(kk == random); + } +} + +void random_launch(void *arguments) +{ + int i, it, j, num_ults, rank, mystart, myend, p; +#ifdef PROFTIME + struct timeval t_start, t_end; + struct timeval t_start2, t_end2; + double time, time2; +#endif + ABT_task *tasks; + vector_scal_task_args_t *arg; + arg = (vector_scal_task_args_t *) arguments; + vector_scal_args_t *args; + it = arg->it; + num_ults = arg->nxstreams; + mystart = arg->start; + myend = arg->end; + int current = 0; + + args = (vector_scal_args_t *) malloc(sizeof(vector_scal_args_t) + * num_ults); + tasks = (ABT_task *)malloc(sizeof(ABT_task) * num_ults); + + int bloc = it / (num_ults); + int rest = it % (num_ults); + int start = 0; + int end = 0; + ABT_xstream_self_rank(&rank); +#ifdef PROFTIME + gettimeofday(&t_start, NULL); +#endif + for (i = mystart; i < myend; i++) { + for (j = 0; j < num_ults; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + +#ifdef PROFTIME + gettimeofday(&t_start2, NULL); +#endif + ABT_task_create(g_pools[rank], exe_random, + (void *)&args[j], &tasks[j]); +#ifdef PROFTIME + gettimeofday(&t_end2, NULL); + time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + printf("Inner_task_creation_time %f\n", (time2 / 1000000.0)); +#endif + } + current++; +#ifdef PROFTIME + gettimeofday(&t_start2, NULL); +#endif + for (p = 0; p < num_ults; p++) { + ABT_task_free(&tasks[p]); + } +#ifdef PROFTIME + gettimeofday(&t_end2, NULL); + time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + printf("Inner_join_time %f\n", (time2 / 1000000.0)); +#endif + } +#ifdef PROFTIME + gettimeofday(&t_end, NULL); + time = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start.tv_sec * 1000000 + t_start.tv_usec); + printf("ult_time %f\n", (time2 / 1000000.0)); +#endif +} + + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int num_xstreams; + char *str, *endptr; + ABT_xstream *xstreams; + vector_scal_task_args_t *args; + + struct timeval t_start, t_end; +#ifdef PROFTIME + struct timeval t_start2, t_end2; + double time2; +#endif + int it; + int inner_xstreams; + srand(1983); + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_ELEMS; + it = ceil(sqrt(ntasks)); + ntasks = it * it; + inner_xstreams = argc > 3 ? atoi(argv[3]) : NUM_XSTREAMS; + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + + xstreams = (ABT_xstream *) malloc(sizeof(ABT_xstream) * num_xstreams); + args = (vector_scal_task_args_t *) malloc(sizeof(vector_scal_task_args_t) + * num_xstreams); + + /* initialization */ + ABT_init(argc, argv); + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + + gettimeofday(&t_start, NULL); + + /* Each task is created on the xstream which is going to execute it */ + + int bloc = it / (num_xstreams); + int rest = it % (num_xstreams); + int start = 0; + int end = 0; + for (j = 0; j < num_xstreams; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].it = it; + args[j].nxstreams = inner_xstreams; +#ifdef PROFTIME + gettimeofday(&t_start2, NULL); +#endif + ABT_thread_create_on_xstream(xstreams[j], random_launch, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + NULL); +#ifdef PROFTIME + gettimeofday(&t_end2, NULL); + time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + printf("ULT creation time %f\n", time2 / 1000000.0); +#endif + } + + ABT_thread_yield(); + +#ifdef PROFTIME + gettimeofday(&t_start2, NULL); +#endif + for (i = 0; i < num_xstreams; i++) { + size_t size; + while (1) { + ABT_pool_get_size(g_pools[i], &size); + if (size == 0) break; + ABT_thread_yield(); + } + } +#ifdef PROFTIME + gettimeofday(&t_end2, NULL); + time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + printf("Join time %f\n", time2 / 1000000.0); + +#endif + + gettimeofday(&t_end, NULL); + double time = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start.tv_sec * 1000000 + t_start.tv_usec); + + + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + printf("%d %d %d %f\n", + num_xstreams, inner_xstreams, ntasks, time / 1000000.0); + + ABT_finalize(); + free(xstreams); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/nested_parallel_for_irregular_abt_thread.c b/bolt/examples/argobots/nested_parallel_for_irregular_abt_thread.c new file mode 100644 index 0000000000000..ae7f75ce9c92c --- /dev/null +++ b/bolt/examples/argobots/nested_parallel_for_irregular_abt_thread.c @@ -0,0 +1,245 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code mimics the parallel for OpenMP directive in nested loops. + * It creates as many streams as user requires and threads are created and + * assigned by static blocs to each stream for the outer loop. + * For the inner loop, as many threads as the user requires are created. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define NUM_ELEMS 5017600 /* 2GB */ +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +ABT_pool *g_pools; + +typedef struct { + int start; + int end; +} vector_scal_args_t; + +typedef struct { + int nxstreams; + int it; + int start; + int end; +} vector_scal_task_args_t; + +void exe_random(void *arguments) +{ + int i, k; + vector_scal_args_t *arg; + arg = (vector_scal_args_t *) arguments; + int mystart = arg->start; + int myend = arg->end; + for (i = mystart; i < myend; i++) { + int random = rand() % 10000; + int kk = 0; + for (k = 0; k < random; k++) + kk++; + assert(kk == random); + } +} + +void random_launch(void *arguments) +{ + int i, it, j, num_ults, rank, mystart, myend, p; +#ifdef PROFTIME + struct timeval t_start, t_end; + struct timeval t_start2, t_end2; + double time, time2; +#endif + ABT_thread *threads; + vector_scal_task_args_t *arg; + arg = (vector_scal_task_args_t *) arguments; + vector_scal_args_t *args; + it = arg->it; + num_ults = arg->nxstreams; + mystart = arg->start; + myend = arg->end; + int current = 0; + args = (vector_scal_args_t *) malloc(sizeof(vector_scal_args_t) + * num_ults); + threads = (ABT_thread *)malloc(sizeof(ABT_thread) * num_ults); + + int bloc = it / (num_ults); + int rest = it % (num_ults); + int start = 0; + int end = 0; + ABT_xstream_self_rank(&rank); +#ifdef PROFTIME + gettimeofday(&t_start, NULL); +#endif + for (i = mystart; i < myend; i++) { + for (j = 0; j < num_ults; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + +#ifdef PROFTIME + gettimeofday(&t_start2, NULL); +#endif + ABT_thread_create(g_pools[rank], exe_random, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + &threads[j]); +#ifdef PROFTIME + gettimeofday(&t_end2, NULL); + time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + printf("Inner_ults_creation_time %f\n", (time2 / 1000000.0)); +#endif + } + current++; +#ifdef PROFTIME + gettimeofday(&t_start2, NULL); +#endif + for (p = 0; p < num_ults; p++) { + ABT_thread_free(&threads[p]); + } +#ifdef PROFTIME + gettimeofday(&t_end2, NULL); + time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + printf("Inner_join_time %f\n", (time2 / 1000000.0)); +#endif + } +#ifdef PROFTIME + gettimeofday(&t_end, NULL); + time = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start.tv_sec * 1000000 + t_start.tv_usec); + printf("ult_time %f\n", (time / 1000000.0)); +#endif +} + + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int num_xstreams; + char *str, *endptr; + ABT_xstream *xstreams; + vector_scal_task_args_t *args; + + struct timeval t_start, t_end; +#ifdef PROFTIME + struct timeval t_start2, t_end2; + double time2; +#endif + int it; + int inner_xstreams; + srand(1983); + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_ELEMS; + it = ceil(sqrt(ntasks)); + ntasks = it * it; + inner_xstreams = argc > 3 ? atoi(argv[3]) : NUM_XSTREAMS; + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + + xstreams = (ABT_xstream *) malloc(sizeof(ABT_xstream) * num_xstreams); + args = (vector_scal_task_args_t *) malloc(sizeof(vector_scal_task_args_t) + * num_xstreams); + + /* initialization */ + ABT_init(argc, argv); + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + + gettimeofday(&t_start, NULL); + + /* Each task is created on the xstream which is going to execute it */ + + int bloc = it / (num_xstreams); + int rest = it % (num_xstreams); + int start = 0; + int end = 0; + for (j = 0; j < num_xstreams; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].it = it; + args[j].nxstreams = inner_xstreams; +#ifdef PROFTIME + gettimeofday(&t_start2, NULL); +#endif + ABT_thread_create_on_xstream(xstreams[j], random_launch, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + NULL); +#ifdef PROFTIME + gettimeofday(&t_end2, NULL); + time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + printf("ULT creation time %f\n", time2 / 1000000.0); +#endif + } + + ABT_thread_yield(); + +#ifdef PROFTIME + gettimeofday(&t_start2, NULL); +#endif + for (i = 0; i < num_xstreams; i++) { + size_t size; + while (1) { + ABT_pool_get_size(g_pools[i], &size); + if (size == 0) break; + ABT_thread_yield(); + } + } +#ifdef PROFTIME + gettimeofday(&t_end2, NULL); + time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) - + (t_start2.tv_sec * 1000000 + t_start2.tv_usec); + printf("Join time %f\n", time2 / 1000000.0); + +#endif + + gettimeofday(&t_end, NULL); + double time = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start.tv_sec * 1000000 + t_start.tv_usec); + + + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + printf("%d %d %d %f\n", + num_xstreams, inner_xstreams, ntasks, time / 1000000.0); + + ABT_finalize(); + free(xstreams); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/nested_parallel_for_irregular_omp.c b/bolt/examples/argobots/nested_parallel_for_irregular_omp.c new file mode 100644 index 0000000000000..e2fa6d39be5dc --- /dev/null +++ b/bolt/examples/argobots/nested_parallel_for_irregular_omp.c @@ -0,0 +1,66 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* Nested Pragma omp parallel for directive evaluation + * Output: avg time + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_ELEMS 5017600 /* 2GB */ +#define NUM_REPS 1 + +int main(int argc, char *argv[]) +{ + int i, j, r, nthreads; + double *time, avg_time = 0.0; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + int n = (argc > 1) ? atoi(argv[1]) : NUM_ELEMS; + int in_th = (argc > 2) ? atoi(argv[2]) : nthreads; + int rep = (argc > 3) ? atoi(argv[3]) : 3; + int it = ceil(sqrt((double)n)); + srand(1983); + + n = it * it; + time = (double *)malloc(sizeof(double) * rep); + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + #pragma omp parallel for + for (j = 0; j < it; j++) { + omp_set_num_threads(in_th); + #pragma omp parallel for + for (i = 0; i < it; i++) { + int random = rand() % 10000; + volatile int kk = 0; + int k; + for (k = 0; k < random; k++) + kk++; + assert(kk == random); + } + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + } + + avg_time /= rep; + printf("%d %d %d %f\n", nthreads, in_th, n, avg_time); + + free(time); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/nested_parallel_for_omp.c b/bolt/examples/argobots/nested_parallel_for_omp.c new file mode 100644 index 0000000000000..97fef26ff9c30 --- /dev/null +++ b/bolt/examples/argobots/nested_parallel_for_omp.c @@ -0,0 +1,83 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* Nested Pragma omp parallel for directives evaluation + * Output: avg time + */ + +#include +#include +#include +#include +#include +#define NUM_ELEMS 5017600 /* 2GB */ +#define NUM_REPS 1 + +/* Vector initialization */ +void init(float *v, int n) +{ + int i = 0; + for (i = 0; i < n; i++) { + v[i] = i + 100.0f; + } +} + +/* Called after each test to be sure that the compiler does + not avoid to execute the test */ +void check(float *v, int n) +{ + int i = 0; + for (i = 0; i < n; i++) { + if (v[i] != (i + 100.0f) * 0.9f) { + printf("v[%d]<=0.0f\n", i); + } + } +} + +int main(int argc, char *argv[]) +{ + int i, j, r, nthreads; + double *time, avg_time = 0.0; + float *v; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + int n = (argc > 1) ? atoi(argv[1]) : NUM_ELEMS; + int in_th = (argc > 2) ? atoi(argv[2]) : nthreads; + int rep = (argc > 3) ? atoi(argv[3]) : NUM_REPS; + int it = ceil(sqrt((double)n)); + n = it * it; + time = (double *)malloc(sizeof(double) * rep); + v = (float *)malloc(sizeof(float) * n); + init(v, n); + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + + #pragma omp parallel for + for (j = 0; j < it; j++) { + omp_set_num_threads(in_th); + #pragma omp parallel for + for (i = 0; i < it; i++) { + v[j * it + i] *= 0.9f; + } + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + } + avg_time /= rep; + check(v, n); + printf("%d %d %d %f\n", nthreads, in_th, n, avg_time); + + free(time); + free(v); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/parallel_for_abt_task.c b/bolt/examples/argobots/parallel_for_abt_task.c new file mode 100644 index 0000000000000..9e5ead4e30509 --- /dev/null +++ b/bolt/examples/argobots/parallel_for_abt_task.c @@ -0,0 +1,142 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* parallel_for_abt_task.c code mimics the parallel for OpenMP directive. It + * creates as many ESs as user requires, and tasks are created and assigned by + * static blocks to each ES. + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_ELEMS 5017600 /* 2GB */ +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +ABT_pool *g_pools; + +typedef struct { + float *ptr; + float value; + int start; + int end; +} vector_scal_args_t; + + +void vector_scal(void *arguments) +{ + int i; + vector_scal_args_t *arg; + arg = (vector_scal_args_t *)arguments; + int mystart = arg->start; + int myend = arg->end; + float value = arg->value; + float *ptr = arg->ptr; + for (i = mystart; i < myend; i++) { + ptr[i] *= value; + } +} + + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int num_xstreams; + char *str, *endptr; + ABT_xstream *xstreams; + vector_scal_args_t *args; + struct timeval t_start, t_end; + float *a; + ABT_task *tasks; + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_ELEMS; + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + tasks = (ABT_task *)malloc(sizeof(ABT_task) * num_xstreams); + a = malloc(sizeof(float) * ntasks); + for (i = 0; i < ntasks; i++) { + a[i] = i * 1.0f; + } + + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + args = (vector_scal_args_t *)malloc(sizeof(vector_scal_args_t) + * num_xstreams); + + /* initialization */ + ABT_init(argc, argv); + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + + gettimeofday(&t_start, NULL); + + /* Each task is created on the xstream which is going to execute it */ + + int bloc = ntasks / (num_xstreams); + int rest = ntasks % (num_xstreams); + int start = 0; + int end = 0; + for (j = 0; j < num_xstreams; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].value = 0.9f; + args[j].ptr = a; + ABT_task_create_on_xstream(xstreams[j], vector_scal, + (void *)&args[j], &tasks[j]); + } + + ABT_thread_yield(); + + for (i = 0; i < num_xstreams; i++) { + ABT_task_free(&tasks[i]); + } + + gettimeofday(&t_end, NULL); + double time = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start.tv_sec * 1000000 + t_start.tv_usec); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + } + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_free(&xstreams[i]); + } + printf("%d %d %f\n", num_xstreams, ntasks, time / 1000000.0); + + ABT_finalize(); + free(xstreams); + for (i = 0; i < ntasks; i++) { + if (a[i] != i * 0.9f) { + printf("%f\n", a[i]); + return EXIT_FAILURE; + } + } + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/parallel_for_abt_thread.c b/bolt/examples/argobots/parallel_for_abt_thread.c new file mode 100644 index 0000000000000..5c4eaae1059c5 --- /dev/null +++ b/bolt/examples/argobots/parallel_for_abt_thread.c @@ -0,0 +1,144 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* parallel_for_abt_thread.c code mimics the parallel for OpenMP directive. + * It creates as many ESs as user requires, and tasks are created and assigned + * by static blocks to each ES. + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_ELEMS 5017600 /* 2GB */ +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +ABT_pool *g_pools; + +typedef struct { + float *ptr; + float value; + int start; + int end; +} vector_scal_args_t; + + +void vector_scal(void *arguments) +{ + int i; + vector_scal_args_t *arg; + arg = (vector_scal_args_t *)arguments; + int mystart = arg->start; + int myend = arg->end; + float value = arg->value; + float *ptr = arg->ptr; + for (i = mystart; i < myend; i++) { + ptr[i] *= value; + } +} + + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int num_xstreams; + char *str, *endptr; + ABT_xstream *xstreams; + vector_scal_args_t *args; + struct timeval t_start, t_end; + float *a; + ABT_thread *threads; + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_ELEMS; + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + threads = (ABT_thread *)malloc(sizeof(ABT_thread) * num_xstreams); + + a = malloc(sizeof(float) * ntasks); + for (i = 0; i < ntasks; i++) { + a[i] = i * 1.0f; + } + + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + args = (vector_scal_args_t *)malloc(sizeof(vector_scal_args_t) + * num_xstreams); + + /* initialization */ + ABT_init(argc, argv); + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + + gettimeofday(&t_start, NULL); + + /* Each task is created on the xstream which is going to execute it */ + + int bloc = ntasks / (num_xstreams); + int rest = ntasks % (num_xstreams); + int start = 0; + int end = 0; + for (j = 0; j < num_xstreams; j++) { + start = end; + int inc = (j < rest) ? 1 : 0; + end += bloc + inc; + args[j].start = start; + args[j].end = end; + args[j].value = 0.9f; + args[j].ptr = a; + ABT_thread_create_on_xstream(xstreams[j], vector_scal, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + &threads[j]); + } + + ABT_thread_yield(); + + for (i = 0; i < num_xstreams; i++) { + ABT_thread_free(&threads[i]); + } + + gettimeofday(&t_end, NULL); + double time = (t_end.tv_sec * 1000000 + t_end.tv_usec) - + (t_start.tv_sec * 1000000 + t_start.tv_usec); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + } + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_free(&xstreams[i]); + } + printf("%d %d %f\n", num_xstreams, ntasks, time / 1000000.0); + + ABT_finalize(); + free(xstreams); + for (i = 0; i < ntasks; i++) { + if (a[i] != i * 0.9f) { + printf("%f\n", a[i]); + return EXIT_FAILURE; + } + } + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/parallel_for_omp.c b/bolt/examples/argobots/parallel_for_omp.c new file mode 100644 index 0000000000000..2b260cf2dae5c --- /dev/null +++ b/bolt/examples/argobots/parallel_for_omp.c @@ -0,0 +1,78 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* Pragma omp parallel for directive evaluation + * Output: avg time + */ + +#include +#include +#include +#include +#include +#define NUM_ELEMS 5017600 /* 2GB */ +#define EXT_LOOP_ELEM 2 /* 2GB */ +#define IN_LOOP_ELEM 2 /* 2GB */ +#define IN_LOOP_TH 1 /* 2GB */ +#define NUM_REPS 1 + +/* Vector initialization */ +void init(float *v, int n) +{ + int i = 0; + for (i = 0; i < n; i++) { + v[i] = i + 100.0f; + } +} + +/* Called after each test to be sure that the compiler does + not avoid to execute the test */ +void check(float *v, int n) +{ + int i = 0; + for (i = 0; i < n; i++) { + if (v[i] != (i + 100.0f) * 0.9f) { + printf("v[%d]<=0.0f\n", i); + } + } +} + +int main(int argc, char *argv[]) +{ + int i, r, nthreads; + double *time, avg_time = 0.0; + float *v; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + int n = (argc > 1) ? atoi(argv[1]) : NUM_ELEMS; + int rep = (argc > 2) ? atoi(argv[2]) : 1; + time = (double *)malloc(sizeof(double) * rep); + v = (float *)malloc(sizeof(float) * n); + init(v, n); + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + #pragma omp parallel for + for (i = 0; i < n; i++) { + v[i] *= 0.9f; + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + } + avg_time /= rep; + check(v, n); + printf("%d %d %f\n", nthreads, n, avg_time); + + free(time); + free(v); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_multiple_producer_abt_task.c b/bolt/examples/argobots/task_multiple_producer_abt_task.c new file mode 100644 index 0000000000000..7da53b06d93cb --- /dev/null +++ b/bolt/examples/argobots/task_multiple_producer_abt_task.c @@ -0,0 +1,163 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code creates one task for each argobots xstream and each task creates + * a number of tasks. This version uses as many pools as execution streams are + * created. This number of tasks is the division between number of tasks + * required and number of streams. This code mimics the all producer all + * consumers system. + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_TASKS 5000000 +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +ABT_pool *g_pools; + +typedef struct { + float *ptr; + float value; + int start; + int end; + int id; +} vector_scal_task_args_t; + +void task_function(void *args) +{ + float *a; + a = (float *)args; + *a = *a * 0.9f; +} + + +void task_creator(void *args) +{ + int i; + vector_scal_task_args_t *arg; + arg = (vector_scal_task_args_t *)args; + for (i = arg->start; i < arg->end; i++) { + ABT_task_create(g_pools[arg->id], task_function, (void *)&arg->ptr[i], + NULL); + } +} + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int start, end; + int num_xstreams; + ABT_xstream *xstreams; + vector_scal_task_args_t *args; + struct timeval t_start, t_end, t_end2; + char *str, *endptr; + float *a; + + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < num_xstreams) { + ntasks = num_xstreams; + } + + printf("# of ESs: %d\n", num_xstreams); + + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + args = (vector_scal_task_args_t *)malloc(sizeof(vector_scal_task_args_t) + * num_xstreams); + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + /* initialization */ + ABT_init(argc, argv); + + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + + /* Work here */ + start = end = 0; + int bloc = ntasks / num_xstreams; + int rest = ntasks % num_xstreams; + gettimeofday(&t_start, NULL); + for (j = 0; j < num_xstreams; j++) { + start = end; + end = start + bloc; + if (j < rest) { + end++; + } + args[j].ptr = a; + args[j].value = 0.9f; + args[j].start = start; + args[j].end = end; + args[j].id = j; + ABT_task_create_on_xstream(xstreams[j], task_creator, + (void *)&args[j], NULL); + } + gettimeofday(&t_end2, NULL); + + for (i = 0; i < num_xstreams; i++) { + size_t size; + do { + ABT_thread_yield(); + ABT_pool_get_size(g_pools[i], &size); + } while (size != 0); + } + + gettimeofday(&t_end, NULL); + + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]\n", i); + } + } + + double time = (t_end.tv_sec * 1000000 + t_end.tv_usec) + - (t_start.tv_sec * 1000000 + t_start.tv_usec); + double time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) + - (t_start.tv_sec * 1000000 + t_start.tv_usec); + + printf("nxstreams: %d\nntasks %d\nTime(s): %f\n", + num_xstreams, ntasks, time / 1000000.0); + /* join ESs */ + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + printf("Creation time=%f\n", time2 / 1000000.0); + ABT_finalize(); + + free(xstreams); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_multiple_producer_abt_thread.c b/bolt/examples/argobots/task_multiple_producer_abt_thread.c new file mode 100644 index 0000000000000..f3a380d436209 --- /dev/null +++ b/bolt/examples/argobots/task_multiple_producer_abt_thread.c @@ -0,0 +1,163 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code creates one task for each argobots xstream and each task creates + * a number of tasks. This version uses as many pools as execution streams are + * created. This number of tasks is the division between number of tasks + * required and number of streams. This code mimics the all producer all + * consumers system. + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_TASKS 5000000 +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +ABT_pool *g_pools; + +typedef struct { + float *ptr; + float value; + int start; + int end; + int id; +} vector_scal_task_args_t; + +void task_function(void *args) +{ + float *a; + a = (float *)args; + *a = *a * 0.9f; +} + +void task_creator(void *args) +{ + int i; + vector_scal_task_args_t *arg; + arg = (vector_scal_task_args_t *)args; + for (i = arg->start; i < arg->end; i++) { + ABT_thread_create(g_pools[arg->id], task_function, (void *)&arg->ptr[i], + ABT_THREAD_ATTR_NULL, NULL); + } +} + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int start, end; + int num_xstreams; + ABT_xstream *xstreams; + vector_scal_task_args_t *args; + struct timeval t_start, t_end, t_end2; + char *str, *endptr; + float *a; + + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < num_xstreams) { + ntasks = num_xstreams; + } + + printf("# of ESs: %d\n", num_xstreams); + + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + args = (vector_scal_task_args_t *)malloc(sizeof(vector_scal_task_args_t) + * num_xstreams); + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_xstreams); + + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + /* initialization */ + ABT_init(argc, argv); + + for (i = 0; i < num_xstreams; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + + /* Work here */ + start = end = 0; + int bloc = ntasks / num_xstreams; + int rest = ntasks % num_xstreams; + gettimeofday(&t_start, NULL); + for (j = 0; j < num_xstreams; j++) { + start = end; + end = start + bloc; + if (j < rest) { + end++; + } + args[j].ptr = a; + args[j].value = 0.9f; + args[j].start = start; + args[j].end = end; + args[j].id = j; + ABT_thread_create_on_xstream(xstreams[j], task_creator, + (void *)&args[j], ABT_THREAD_ATTR_NULL, + NULL); + } + gettimeofday(&t_end2, NULL); + + for (i = 0; i < num_xstreams; i++) { + size_t size; + do { + ABT_thread_yield(); + ABT_pool_get_size(g_pools[i], &size); + } while (size != 0); + } + + gettimeofday(&t_end, NULL); + + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]\n", i); + } + } + + double time = (t_end.tv_sec * 1000000 + t_end.tv_usec) + - (t_start.tv_sec * 1000000 + t_start.tv_usec); + double time2 = (t_end2.tv_sec * 1000000 + t_end2.tv_usec) + - (t_start.tv_sec * 1000000 + t_start.tv_usec); + + printf("nxstreams: %d\nntasks %d\nTime(s): %f\n", + num_xstreams, ntasks, time / 1000000.0); + /* join ESs */ + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + printf("Creation time=%f\n", time2 / 1000000.0); + ABT_finalize(); + + free(xstreams); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_multiple_producer_omp.c b/bolt/examples/argobots/task_multiple_producer_omp.c new file mode 100644 index 0000000000000..ef64c4109cba4 --- /dev/null +++ b/bolt/examples/argobots/task_multiple_producer_omp.c @@ -0,0 +1,90 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +#include +#include +#include +#include +#include + +#define NUM_TASKS 5000000 +#define NUM_REPS 1 +#define USLEEP usleep(100); + +/* Pragma omp task directive evaluation + * Output: avg time + */ + +void sscal(float value, float *a) +{ + *a = *a * value; +} + + +int main(int argc, char *argv[]) +{ + int i, r, nthreads; + double *time, avg_time = 0.0; + char *str, *endptr; + float *a; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + + if (argc > 1) { + str = argv[1]; + } + + int ntasks = argc > 1 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < nthreads) { + ntasks = nthreads; + } + + int rep = (argc > 2) ? atoi(argv[2]) : NUM_REPS; + + time = malloc(sizeof(double) * (rep + 1)); + + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + #pragma omp parallel + { + time[1] = omp_get_wtime(); + #pragma omp for + for (i = 0; i < ntasks; i++) { + #pragma omp task firstprivate(i) + { + sscal(0.9f, &a[i]); + } + } + time[1] = (omp_get_wtime() - time[1]); + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + } + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]=%.2f expected %.2f\n", i, + a[i], (i + 100.0f) * 0.9f); + } + } + + avg_time /= rep; + printf("nthreads: %d\nntasks: %d\nTime(s):%f\nCreation Time: %f\n", + nthreads, ntasks, avg_time, time[1]); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_nested_abt_task.c b/bolt/examples/argobots/task_nested_abt_task.c new file mode 100644 index 0000000000000..694fb5a67085a --- /dev/null +++ b/bolt/examples/argobots/task_nested_abt_task.c @@ -0,0 +1,137 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code creates all tasks from the main ES but using as many pools as + * xstreams and they are executed by all the xstreams. This code mimics + * the 1 producers all consumers system. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#define NUM_TASKS 50000 +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +ABT_pool *g_pools; +int num_pools; +int num_xstreams; +int pool_for_task = 0; +int o = 0; + +void vector_scal(void *arguments) +{ + float *a; + a = (float *)arguments; + *a = *a * 0.9f; +} + +void na(void *arguments) +{ + o++; +} + +void prevector_scal(void *arguments) +{ + ABT_task_create(g_pools[pool_for_task % num_pools], vector_scal, + arguments, NULL); + ABT_task_create(g_pools[pool_for_task % num_pools], na, arguments, NULL); + pool_for_task++; +} + + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + ABT_xstream *xstreams; + ABT_task *tasks; + struct timeval start, end, end2; + char *str, *endptr; + float *a; + + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < num_xstreams) { + ntasks = num_xstreams; + } + num_pools = argc > 3 ? atoi(argv[3]) : num_xstreams; + printf("# of ESs: %d Pools: %d\n", num_xstreams, num_pools); + + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + tasks = (ABT_task *)malloc(sizeof(ABT_task) * num_xstreams); + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_pools); + + /* initialization */ + ABT_init(argc, argv); + + /* shared pool creation */ + for (i = 0; i < num_pools; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i % num_pools], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + /* Work here */ + gettimeofday(&start, NULL); + for (j = 0; j < ntasks; j++) { + ABT_task_create_on_xstream(xstreams[j % num_xstreams], prevector_scal, + (void *)&a[j], NULL); + } + + gettimeofday(&end2, NULL); + ABT_thread_yield(); + for (i = 1; i < num_xstreams; i++) { + size_t size; + while (1) { + ABT_pool_get_size(g_pools[i], &size); + if (size == 0) break; + ABT_thread_yield(); + } + } + + gettimeofday(&end, NULL); + double time = (end.tv_sec * 1000000 + end.tv_usec) + - (start.tv_sec * 1000000 + start.tv_usec); + double time2 = (end2.tv_sec * 1000000 + end2.tv_usec) + - (start.tv_sec * 1000000 + start.tv_usec); + + printf("nxstreams: %d\nntasks %d\nTotal Time(s): %f\n Creation Time (s): %f\n", + num_xstreams, ntasks, time / 1000000.0, time2 / 1000000.0); + printf("o=%d and it should be %d\n", o, ntasks); + + /* join ESs */ + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + + free(tasks); + free(xstreams); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_nested_lvl2_abt_task.c b/bolt/examples/argobots/task_nested_lvl2_abt_task.c new file mode 100644 index 0000000000000..91651eaa1cecf --- /dev/null +++ b/bolt/examples/argobots/task_nested_lvl2_abt_task.c @@ -0,0 +1,154 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code creates all tasks from the main ES but using as many pools as + * xstreams and they are executed by all the xstreams. This code mimics the 1 + * producers all consumers system. +*/ + + +#include +#include +#include +#include +#include +#include + +#define NUM_TASKS 50000 +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 +#define LEVELS 2 + +int num_pools; +int num_xstreams; +int o = 0; +ABT_pool *g_pools; +int lvl; + +void vector_scal(void *arguments) +{ + float *a; + a = (float *)arguments; + *a = *a * 0.9f; +} + +void na(void *arguments) +{ + o++; +} + +void prevector_scal2(void *arguments) +{ + int rank; + ABT_xstream_self_rank(&rank); + ABT_task_create(g_pools[rank], vector_scal, arguments, NULL); + ABT_task_create(g_pools[rank], na, arguments, NULL); +} + +void prevector_scal(void *arguments) +{ + int rank; + ABT_xstream_self_rank(&rank); + ABT_task_create(g_pools[rank], prevector_scal2, arguments, NULL); + ABT_task_create(g_pools[rank], prevector_scal2, arguments, NULL); +} + + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + ABT_xstream *xstreams; + ABT_task *tasks; + struct timeval start, end, end2; + char *str, *endptr; + float *a; + + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < num_xstreams) { + ntasks = num_xstreams; + } + num_pools = argc > 3 ? atoi(argv[3]) : num_xstreams; + lvl = (argc > 4) ? atoi(argv[4]) : LEVELS; + + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + tasks = (ABT_task *)malloc(sizeof(ABT_task) * num_xstreams); + g_pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_pools); + + /* initialization */ + ABT_init(argc, argv); + + /* shared pool creation */ + for (i = 0; i < num_pools; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &g_pools[i]); + } + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &g_pools[0]); + + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &g_pools[i % num_pools], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + /* Work here */ + gettimeofday(&start, NULL); + for (j = 0; j < ntasks; j++) { + ABT_task_create_on_xstream(xstreams[j % num_xstreams], prevector_scal, + (void *)&a[j], NULL); + } + + gettimeofday(&end2, NULL); + ABT_thread_yield(); + for (i = 1; i < num_xstreams; i++) { + size_t size; + do { + ABT_pool_get_size(g_pools[i], &size); + } while (size != 0); + + } + + gettimeofday(&end, NULL); + double time = (end.tv_sec * 1000000 + end.tv_usec) + - (start.tv_sec * 1000000 + start.tv_usec); + double time2 = (end2.tv_sec * 1000000 + end2.tv_usec) + - (start.tv_sec * 1000000 + start.tv_usec); + + printf("nxstreams: %d\nntasks %d\nTime(s): %f\n", + num_xstreams, ntasks, time / 1000000.0); + printf("o=%d ans it should be %d\n", o, ntasks); + printf("Creation time= %f\n", time2 / 1000000.0); + + /* join ESs */ + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + //TODO: it only works for 1 rep + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]\n", i); + } + } + ABT_finalize(); + + free(tasks); + free(xstreams); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_nested_lvl2_omp.c b/bolt/examples/argobots/task_nested_lvl2_omp.c new file mode 100644 index 0000000000000..8d9b1dcfe9dcc --- /dev/null +++ b/bolt/examples/argobots/task_nested_lvl2_omp.c @@ -0,0 +1,121 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +#include +#include +#include +#include +#include + +#define NUM_TASKS 50000 +#define NUM_REPS 1 +#define LEVELS 2 + +int o = 0; +void sscal(float value, float *a) +{ + *a = *a * value; +} + +void na(float value) +{ + o++; +} + +void presscal(float value, float *a, int lvl, int i) +{ + if (lvl > 1) { + lvl--; + #pragma omp task + { + presscal(value, a, lvl, i); + } + #pragma omp task + { + presscal(value, a, lvl, i); + } + } + else { + #pragma omp task + { + sscal(value, a); + } + + #pragma omp task + { + na(value); + } + } +} + +int main(int argc, char *argv[]) +{ + int i, r, nthreads; + double *time, avg_time = 0.0; + char *str, *endptr; + float *a; + double time2 = 0.0; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + + if (argc > 1) { + str = argv[1]; + } + + int ntasks = argc > 1 ? strtoll(str, &endptr, 10) : NUM_TASKS; + + int lvl = (argc > 2) ? atoi(argv[2]) : LEVELS; + + int rep = (argc > 3) ? atoi(argv[3]) : NUM_REPS; + + time = malloc(sizeof(double) * rep); + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + #pragma omp parallel + { + #pragma omp single + { + time2 = omp_get_wtime(); + for (i = 0; i < ntasks; i++) { + #pragma omp task firstprivate(i) + { + presscal(0.9f, &a[i], lvl, i); + } + } + time2 = omp_get_wtime() - time2; + } + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + + } + + // TODO: Just works with one repetition + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]=%2.f expected %2.f\n", i, + a[i], (i + 100.0f) * 0.9f); + } + } + avg_time /= rep; + printf("nthreads: %d\nntasks: %d\nTime(s):%f\nCreation Time: %f\n", + nthreads, ntasks, avg_time, time2); + printf("o=%d deberia valer %d\n", o, ntasks); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_nested_omp.c b/bolt/examples/argobots/task_nested_omp.c new file mode 100644 index 0000000000000..0a88b11ddf7ad --- /dev/null +++ b/bolt/examples/argobots/task_nested_omp.c @@ -0,0 +1,108 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +#include +#include +#include +#include +#include + +#define NUM_TASKS 50000 +#define NUM_REPS 1 + +int o = 0; + +void sscal(float value, float *a) +{ + *a = *a * value; +} + +void na(float value) +{ + o++; +} + +void presscal(float value, float *a) +{ + #pragma omp task + { + sscal(value, a); + } + + #pragma omp task + { + na(value); + } +} + +int main(int argc, char *argv[]) +{ + int i, r, nthreads; + double *time, avg_time = 0.0; + char *str, *endptr; + float *a; + double time2 = 0.0; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + + if (argc > 1) { + str = argv[1]; + } + + int ntasks = argc > 1 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < nthreads) + ntasks = nthreads; + + int rep = (argc > 2) ? atoi(argv[2]) : NUM_REPS; + + time = malloc(sizeof(double) * rep); + a = malloc(sizeof(float) * ntasks); + + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + #pragma omp parallel + { + #pragma omp single + { + time2 = omp_get_wtime(); + for (i = 0; i < ntasks; i++) { + #pragma omp task firstprivate(i) + { + presscal(0.9f, &a[i]); + } + } + time2 = omp_get_wtime() - time2; + } + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + } + + // TODO: Just works with one repetition + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]=%2.f expected %2.f\n", i, + a[i], (i + 100.0f) * 0.9f); + } + } + avg_time /= rep; + printf("nthreads: %d\nntasks: %d\nTime(s):%f\nCreation Time: %f\n", + nthreads, ntasks, avg_time, time2); + printf("o=%d deberia valer %d\n", o, ntasks); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_single_producer_abt_task.c b/bolt/examples/argobots/task_single_producer_abt_task.c new file mode 100644 index 0000000000000..f326108c51368 --- /dev/null +++ b/bolt/examples/argobots/task_single_producer_abt_task.c @@ -0,0 +1,119 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code creates all tasks from the main ES but using as many pools as + * xstreams and they are executed by all the xstreams. It mimics one producer + * all consumers system + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_TASKS 5000000 +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +void vector_scal(void *arguments) +{ + float *a; + a = (float *)arguments; + *a = *a * 0.9f; +} + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int num_xstreams; + int num_pools; + ABT_xstream *xstreams; + ABT_task *tasks; + ABT_pool *pools; + struct timeval start, end; + char *str, *endptr; + float *a; + + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < num_xstreams) { + ntasks = num_xstreams; + } + num_pools = argc > 5 ? atoi(argv[5]) : num_xstreams; + + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + tasks = (ABT_task *)malloc(sizeof(ABT_task) * num_xstreams); + pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_pools); + + /* initialization */ + ABT_init(argc, argv); + + /* shared pool creation */ + for (i = 0; i < num_pools; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &pools[i]); + } + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &pools[0]); + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &pools[i % num_pools], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + /* Work here */ + gettimeofday(&start, NULL); + for (j = 0; j < ntasks; j++) { + ABT_task_create(pools[j % num_pools], vector_scal, (void *)&a[j], NULL); + } + + gettimeofday(&end, NULL); + double time2 = (end.tv_sec * 1000000 + end.tv_usec) + - (start.tv_sec * 1000000 + start.tv_usec); + ABT_thread_yield(); + for (i = 0; i < num_pools; i++) { + size_t size; + do { + ABT_pool_get_size(pools[i], &size); + } while (size != 0); + } + + gettimeofday(&end, NULL); + double time = (end.tv_sec * 1000000 + end.tv_usec) + - (start.tv_sec * 1000000 + start.tv_usec); + printf("nxstreams: %d\nntasks %d\nTime(s): %f Creation Time(s): %f\n", + num_xstreams, ntasks, time / 1000000.0, time2 / 1000000.0); + + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]\n", i); + } + } + + /* join ESs */ + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + + free(tasks); + free(xstreams); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_single_producer_abt_thread.c b/bolt/examples/argobots/task_single_producer_abt_thread.c new file mode 100644 index 0000000000000..e2d44dc652ad5 --- /dev/null +++ b/bolt/examples/argobots/task_single_producer_abt_thread.c @@ -0,0 +1,118 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* This code creates all tasks from the main ES but using as many pools as + * xstreams and they are executed by all the xstreams. It mimics one producer + * all consumers system + */ + +#include +#include +#include +#include +#include +#include + +#define NUM_TASKS 5000000 +#define NUM_XSTREAMS 4 +#define NUM_REPS 1 + +void vector_scal(void *arguments) +{ + float *a; + a = (float *)arguments; + *a = *a * 0.9f; +} + +int main(int argc, char *argv[]) +{ + int i, j; + int ntasks; + int num_xstreams; + int num_pools; + ABT_xstream *xstreams; + ABT_task *tasks; + ABT_pool *pools; + struct timeval start, end; + char *str, *endptr; + float *a; + + num_xstreams = argc > 1 ? atoi(argv[1]) : NUM_XSTREAMS; + if (argc > 2) { + str = argv[2]; + } + ntasks = argc > 2 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < num_xstreams) { + ntasks = num_xstreams; + } + num_pools = argc > 5 ? atoi(argv[5]) : num_xstreams; + printf("# of ESs: %d Pools: %d\n", num_xstreams, num_pools); + + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + xstreams = (ABT_xstream *)malloc(sizeof(ABT_xstream) * num_xstreams); + tasks = (ABT_task *)malloc(sizeof(ABT_task) * num_xstreams); + pools = (ABT_pool *)malloc(sizeof(ABT_pool) * num_pools); + + /* initialization */ + ABT_init(argc, argv); + + /* shared pool creation */ + for (i = 0; i < num_pools; i++) { + ABT_pool_create_basic(ABT_POOL_FIFO, ABT_POOL_ACCESS_MPMC, ABT_TRUE, + &pools[i]); + } + /* ES creation */ + ABT_xstream_self(&xstreams[0]); + ABT_xstream_set_main_sched_basic(xstreams[0], ABT_SCHED_DEFAULT, + 1, &pools[0]); + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_create_basic(ABT_SCHED_DEFAULT, 1, &pools[i % num_pools], + ABT_SCHED_CONFIG_NULL, &xstreams[i]); + ABT_xstream_start(xstreams[i]); + } + /* Work here */ + gettimeofday(&start, NULL); + for (j = 0; j < ntasks; j++) { + ABT_thread_create(pools[j % num_pools], vector_scal, + (void *)&a[j], ABT_THREAD_ATTR_NULL, NULL); + } + + ABT_thread_yield(); + for (i = 0; i < num_pools; i++) { + size_t size; + do { + ABT_pool_get_size(pools[i], &size); + } while (size != 0); + } + + gettimeofday(&end, NULL); + double time = (end.tv_sec * 1000000 + end.tv_usec) + - (start.tv_sec * 1000000 + start.tv_usec); + printf("nxstreams: %d\nntasks %d\nTime(s): %f\n", + num_xstreams, ntasks, time / 1000000.0); + + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]\n", i); + } + } + + /* join ESs */ + for (i = 1; i < num_xstreams; i++) { + ABT_xstream_join(xstreams[i]); + ABT_xstream_free(&xstreams[i]); + } + + free(tasks); + free(xstreams); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/task_single_producer_omp.c b/bolt/examples/argobots/task_single_producer_omp.c new file mode 100644 index 0000000000000..7a64a75c480ad --- /dev/null +++ b/bolt/examples/argobots/task_single_producer_omp.c @@ -0,0 +1,90 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +#include +#include +#include +#include +#include + +#define NUM_TASKS 5000000 +#define NUM_REPS 1 + +void sscal(float value, float *a) +{ + *a = *a * value; +} + +int main(int argc, char *argv[]) +{ + int i, r, nthreads; + double *time, avg_time = 0.0; + char *str, *endptr; + float *a; + double time2 = 0.0; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + + if (argc > 1) { + str = argv[1]; + } + + int ntasks = argc > 1 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < nthreads) + ntasks = nthreads; + + int rep = (argc > 2) ? atoi(argv[2]) : NUM_REPS; + time = malloc(sizeof(double) * rep); + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + #pragma omp parallel + { + #pragma omp single + { + sleep(2); + printf("Thread %d\n", omp_get_thread_num()); + time2 = omp_get_wtime(); + for (i = 0; i < ntasks; i++) { + #pragma omp task firstprivate(i) + { + printf("Task %d executed by Thread %d Stolen? %s\n", + i, omp_get_thread_num(), + (i % nthreads == omp_get_thread_num()) + ? "NO" : "YES"); + sscal(0.9f, &a[i]); + } + } + time2 = omp_get_wtime() - time2; + } + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + + } + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]=%2.f expected %2.f\n", i, + a[i], (i + 100.0f) * 0.9f); + } + } + avg_time /= rep; + printf("nthreads: %d\nntasks: %d\nTime(s):%f\nCreation Time: %f\n", + nthreads, ntasks, avg_time, time2); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/taskwait_omp.c b/bolt/examples/argobots/taskwait_omp.c new file mode 100644 index 0000000000000..9863839da190e --- /dev/null +++ b/bolt/examples/argobots/taskwait_omp.c @@ -0,0 +1,117 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* + * A bunch of n tasks (1st arg) are created by a single thread. + * Each task creates two tasks more and executes a taskwait directive + */ + +#include +#include +#include +#include +#include + +#define NUM_TASKS 50000 +#define NUM_REPS 1 + +int o = 0; +int pp = 0; + +void na(float value) +{ + o++; +} + +void sscal(float value, float *a) +{ + *a = *a * value; +} + +void presscal(float value, float *a) +{ + #pragma omp task + { + sscal(value, a); + } + + #pragma omp task + { + na(value); + } + + #pragma omp taskwait +} + +int main(int argc, char *argv[]) +{ + int i, r, nthreads; + double *time, avg_time = 0.0; + char *str, *endptr; + float *a; + double time2 = 0.0; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + + if (argc > 1) { + str = argv[1]; + } + + int ntasks = argc > 1 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < nthreads) + ntasks = nthreads; + + int rep = (argc > 2) ? atoi(argv[2]) : NUM_REPS; + + time = malloc(sizeof(double) * rep); + + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + #pragma omp parallel + { + #pragma omp single + { + time2 = omp_get_wtime(); + for (i = 0; i < ntasks; i++) { + #pragma omp task firstprivate(i) + { + presscal(0.9f, &a[i]); + } + } + time2 = omp_get_wtime() - time2; + } + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + + } + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]=%2.f expected %2.f\n", i, + a[i], (i + 100.0f) * 0.9f); + } + } + avg_time /= rep; + + printf("nthreads: %d\nntasks: %d\nTime(s):%f\nCreation Time: %f\n", + nthreads, ntasks, avg_time, time2); + printf("o=%d and it should be %d\n", o, ntasks); + printf("pp=%d and it should be %d\n", pp, ntasks); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/argobots/taskyield_omp.c b/bolt/examples/argobots/taskyield_omp.c new file mode 100644 index 0000000000000..7436453a67459 --- /dev/null +++ b/bolt/examples/argobots/taskyield_omp.c @@ -0,0 +1,115 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +/* + * A bunch of n tasks (1st arg) are created by a single thread. + * Each task creates two tasks more and inside the second one + * a taskyield directive is called + */ + +#include +#include +#include +#include +#include + +#define NUM_TASKS 50000 +#define NUM_REPS 1 + +int o = 0; +int pp = 0; + +void na(float value) +{ + o++; +} + +void sscal(float value, float *a) +{ + *a = *a * value; +} + +void presscal(float value, float *a) +{ + #pragma omp task + { + sscal(value, a); + } + + #pragma omp task + { + na(value); + #pragma omp taskyield + } +} + +int main(int argc, char *argv[]) +{ + int i, r, nthreads; + double *time, avg_time = 0.0; + char *str, *endptr; + float *a; + double time2 = 0.0; + + #pragma omp parallel + { + #pragma omp master + { + nthreads = omp_get_num_threads(); + } + } + + if (argc > 1) { + str = argv[1]; + } + + int ntasks = argc > 1 ? strtoll(str, &endptr, 10) : NUM_TASKS; + if (ntasks < nthreads) + ntasks = nthreads; + + int rep = (argc > 2) ? atoi(argv[2]) : NUM_REPS; + + time = malloc(sizeof(double) * rep); + a = malloc(sizeof(float) * ntasks); + + for (i = 0; i < ntasks; i++) { + a[i] = i + 100.0f; + } + + for (r = 0; r < rep; r++) { + time[r] = omp_get_wtime(); + #pragma omp parallel + { + #pragma omp single + { + time2 = omp_get_wtime(); + for (i = 0; i < ntasks; i++) { + #pragma omp task firstprivate(i) + { + presscal(0.9f, &a[i]); + } + } + time2 = omp_get_wtime() - time2; + } + } + time[r] = omp_get_wtime() - time[r]; + avg_time += time[r]; + } + + for (i = 0; i < ntasks; i++) { + if (a[i] != (i + 100.0f) * 0.9f) { + printf("error: a[%d]=%2.f expected %2.f\n", i, + a[i], (i + 100.0f) * 0.9f); + } + } + avg_time /= rep; + printf("nthreads: %d\nntasks: %d\nTime(s):%f\nCreation Time: %f\n", + nthreads, ntasks, avg_time, time2); + printf("o=%d and it should be %d\n", o, ntasks); + printf("pp=%d and it should be %d\n", pp, ntasks); + + return EXIT_SUCCESS; +} diff --git a/bolt/examples/sample_nested.c b/bolt/examples/sample_nested.c new file mode 100644 index 0000000000000..2feb5be2eb870 --- /dev/null +++ b/bolt/examples/sample_nested.c @@ -0,0 +1,59 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ + +/* + * See LICENSE.txt in top-level directory. + */ + +#include +#include +#include + + + +int main(int argc, char * argv[]) { + + int size=(argc>1)?atoi(argv[1]):100; + int i,j,k=0; + int nthreads; + struct timeval t_start, t_end; + double time; + + double *a = (double *)malloc(sizeof(double)*size*size); + + #pragma omp parallel + { + nthreads=omp_get_num_threads(); + } + + for(i=0;i +#include +#include + +int main(int argc, char * argv[]) { + + int i,num=(argc>1)?atoi(argv[1]):100; + int nthreads; + struct timeval t_start, t_end; + double time; + double *a = (double *)malloc(sizeof(double)*num); + + #pragma omp parallel + { + nthreads=omp_get_num_threads(); + } + + + for(i=0;i +#include +#include + +int main(int argc, char * argv[]) { + + int i,num=(argc>1)?atoi(argv[1]):100; + int nthreads; + struct timeval t_start, t_end; + double time; + double *a = (double *)malloc(sizeof(double)*num); + + #pragma omp parallel + { + nthreads=omp_get_num_threads(); + } + + + for(i=0;i