diff --git a/CMakeLists.txt b/CMakeLists.txt index c72a192db..b23aa7393 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -191,9 +191,9 @@ set(SOURCES src/executor.cc src/distributed_graph_generator.cc src/graph_serializer.cc + src/grid.cc src/print_graph.cc - src/print_utils.cc - src/recorders.cc + src/c/corders.cc src/runtime.cc src/scheduler.cc src/task.cc @@ -346,10 +346,6 @@ install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/ DESTINATION include/celerity ) -install( - DIRECTORY ${PROJECT_SOURCE_DIR}/vendor/allscale/ - DESTINATION include/celerity/vendor/allscale -) install( FILES ${PROJECT_SOURCE_DIR}/vendor/ctpl_stl.h DESTINATION include/celerity/vendor diff --git a/ci/perf/gpuc2_bench.csv b/ci/perf/gpuc2_bench.csv index 688808a6c..3d6579b5a 100644 --- a/ci/perf/gpuc2_bench.csv +++ b/ci/perf/gpuc2_bench.csv @@ -1,87 +1,142 @@ test case,benchmark name,samples,iterations,estimated,mean,low mean,high mean,std dev,low std dev,high std dev,raw -benchmark intrusive graph dependency handling with N nodes - 1,creating nodes,100,5574,2229600,4.4727,4.4716,4.4738,0.0057,0.0055,0.0059,"4.4679,4.4804,4.4697,4.4681,4.4804,4.4679,4.4788,4.4679,4.4788,4.4679,4.4679,4.4788,4.4661,4.4787,4.4681,4.4787,4.4679,4.4681,4.4787,4.4681,4.4787,4.4679,4.4788,4.4679,4.4679,4.4804,4.4679,4.4788,4.4661,4.4806,4.4697,4.4679,4.4806,4.4661,4.4804,4.4681,4.4804,4.4681,4.4679,4.4787,4.4681,4.4787,4.4679,4.4806,4.4679,4.4679,4.4787,4.4679,4.4806,4.4679,4.4804,4.4681,4.4679,4.4787,4.4679,4.4787,4.4699,4.4787,4.4679,4.4663,4.4787,4.4679,4.4788,4.4679,4.4788,4.4661,4.4679,4.4788,4.4679,4.4787,4.4679,4.4804,4.4699,4.4787,4.4661,4.4681,4.4787,4.4679,4.4788,4.4679,4.4788,4.4661,4.4679,4.4788,4.4679,4.4787,4.4679,4.4787,4.4663,4.4679,4.4787,4.4681,4.4787,4.4679,4.4788,4.4679,4.4679,4.4804,4.4679,4.4788" -benchmark intrusive graph dependency handling with N nodes - 1,creating and adding dependencies,100,1022,2350600,22.2850,22.1668,22.6931,1.0017,0.3307,2.2643,"22.0254,22.0254,22.8885,22.9755,22.9472,22.9374,22.9755,22.8493,22.9658,22.9472,22.9364,22.9765,22.8679,22.9765,22.7798,22.9472,22.8679,22.8885,22.9168,31.6321,23.0841,22.0147,22.0254,22.0254,22.0254,22.0352,22.0245,22.0254,22.0254,22.0254,22.0342,22.0245,22.0254,22.0254,22.0254,22.0342,22.0254,22.0254,22.0254,22.0245,22.0342,22.0254,22.0254,22.0254,22.0245,22.0352,22.0254,22.0254,22.0245,22.0245,22.0352,22.0157,22.0352,22.0245,22.0254,22.0352,22.0254,22.0352,22.0245,22.0059,22.0254,22.0352,22.0245,22.0254,22.0254,22.0254,22.0342,22.0147,22.0352,22.0254,22.0254,22.0245,22.0254,22.0352,22.0254,22.0049,22.0245,22.0254,22.0352,22.0254,22.0342,22.0254,22.0254,22.0157,22.0342,22.0342,22.0254,22.0254,22.0254,22.0245,22.0352,22.0157,22.0352,22.0245,22.0245,22.0352,22.0254,22.0352,22.0245,22.0157" -benchmark intrusive graph dependency handling with N nodes - 1,adding and removing dependencies,100,1508,2262000,15.5132,15.5022,15.5258,0.0601,0.0534,0.0681,"15.5584,15.5113,15.5053,15.4582,15.6844,15.5113,15.4589,15.4781,15.5179,15.5975,15.4788,15.6373,15.4523,15.4582,15.4847,15.4523,15.4781,15.4582,15.4788,15.4516,15.4516,15.4788,15.5776,15.5385,15.4582,15.4847,15.4980,15.6247,15.4516,15.6114,15.5046,15.5716,15.5511,15.5252,15.5908,15.4847,15.6313,15.4516,15.4516,15.4854,15.4516,15.4781,15.4523,15.4847,15.4649,15.6180,15.4914,15.5849,15.5318,15.4582,15.4847,15.5053,15.6240,15.4589,15.6174,15.4980,15.5782,15.5378,15.4582,15.4788,15.4914,15.6247,15.4516,15.6048,15.5113,15.4523,15.4781,15.4516,15.4788,15.4781,15.6114,15.4781,15.4582,15.4715,15.5511,15.5650,15.5046,15.6107,15.4589,15.6174,15.4920,15.5776,15.5385,15.4582,15.4715,15.4980,15.6313,15.4582,15.4582,15.4788,15.5710,15.5517,15.4516,15.4847,15.4516,15.4847,15.4582,15.5975,15.5186,15.5511" -benchmark intrusive graph dependency handling with N nodes - 1,checking for dependencies,100,13832,1383200,1.6933,1.6926,1.6940,0.0036,0.0034,0.0036,"1.6904,1.6977,1.6983,1.6904,1.6904,1.6977,1.6904,1.6977,1.6904,1.6904,1.6976,1.6904,1.6976,1.6904,1.6911,1.6977,1.6904,1.6977,1.6904,1.6904,1.6976,1.6904,1.6976,1.6904,1.6904,1.6977,1.6904,1.6969,1.6904,1.6904,1.6976,1.6904,1.6976,1.6904,1.6896,1.6977,1.6904,1.6977,1.6904,1.6904,1.6977,1.6904,1.6977,1.6904,1.6896,1.6969,1.6904,1.6976,1.6904,1.6976,1.6897,1.6904,1.6977,1.6904,1.6977,1.6904,1.6904,1.6969,1.6904,1.6976,1.6904,1.6904,1.6977,1.6904,1.6977,1.6904,1.6904,1.6976,1.6904,1.6976,1.6904,1.6904,1.6977,1.6904,1.6977,1.6904,1.6904,1.6976,1.6904,1.6976,1.6904,1.6904,1.6977,1.6904,1.6977,1.6904,1.6904,1.6977,1.6904,1.6977,1.6904,1.6977,1.6904,1.6904,1.6969,1.6904,1.6969,1.6904,1.6904,1.6984" -benchmark intrusive graph dependency handling with N nodes - 10,creating nodes,100,602,2347800,38.9929,38.9357,39.2234,0.5309,0.0599,1.2591,"39.0216,38.8738,39.0216,39.0066,38.8887,39.0050,38.8738,38.8887,39.0066,38.8887,39.0066,38.8721,38.9900,38.8887,39.0066,38.8887,38.8887,39.0066,38.8887,39.0066,38.8721,39.0066,38.8721,38.9900,38.8887,38.8904,39.0050,38.8721,39.0066,38.8887,39.0066,38.8721,39.0066,38.8721,38.8904,39.0050,38.8738,39.0066,38.8721,38.9900,38.8887,39.0066,38.8887,38.8904,39.0050,38.8904,38.9884,38.8904,38.9884,38.8887,38.8721,39.0050,38.8904,38.9884,38.8904,38.9884,38.8738,38.9884,38.8904,38.8721,39.0066,38.8887,38.9884,38.8738,39.0050,38.8904,39.0050,38.8738,38.8887,38.9900,38.8887,38.9900,38.8904,39.0050,38.8904,38.9884,38.8738,38.8887,39.0066,38.8887,39.0066,44.2309,39.3040,38.8887,38.9900,38.8887,38.8738,38.9884,38.8738,39.0050,38.8904,38.9884,38.8904,39.0216,38.8887,38.8721,39.0066,38.8904,38.9884,38.8904" -benchmark intrusive graph dependency handling with N nodes - 10,creating and adding dependencies,100,97,2357100,243.3019,243.2037,243.4190,0.5452,0.4513,0.7743,"242.7938,243.0103,246.0000,244.0412,243.2165,244.2474,242.8041,242.4948,243.7216,243.3093,244.3505,243.1134,243.3196,243.5258,242.7010,243.6289,243.5258,243.1031,243.5258,243.3196,243.5258,243.8351,242.8041,243.5258,242.8969,243.4227,243.3196,243.3196,242.3918,243.1031,243.7216,243.4227,244.2474,243.0103,243.3196,243.6289,243.3196,243.8351,242.8969,243.3093,243.5258,243.2165,244.5567,243.0103,243.3196,243.6289,243.0000,243.6186,243.1134,242.4948,243.4227,242.9072,243.8351,243.4124,243.7320,243.2165,242.9072,243.3196,242.8041,243.3093,243.2165,243.4227,243.8351,243.7320,243.7320,243.0103,242.9072,243.3093,242.9072,243.1134,242.8041,243.3196,243.4124,243.2062,244.1443,242.5979,243.3196,243.6289,243.4227,243.3093,242.7938,244.0412,243.3196,242.9072,244.1443,242.8041,243.6289,243.3093,242.8041,243.7320,242.2887,242.4845,242.3814,242.8041,243.6289,242.7010,243.1031,242.4948,242.0825,243.4227" -benchmark intrusive graph dependency handling with N nodes - 10,adding and removing dependencies,100,111,2364300,211.6386,211.5114,211.7677,0.6533,0.5265,0.8850,"211.9910,211.6396,214.2523,211.5405,212.2703,211.6306,211.9009,212.0901,210.9099,212.0811,211.4595,211.6306,212.8018,212.8108,212.0901,211.2793,211.7207,212.8108,212.8108,212.0901,211.7207,211.0991,211.5495,211.2703,212.1802,211.5495,211.6306,211.8198,211.3604,211.9099,212.7207,211.1892,210.9099,211.1892,211.8108,210.8288,211.0901,211.8198,211.6396,211.4505,211.0991,211.5405,212.0901,212.1802,211.6396,210.9099,211.5495,211.2703,211.1892,211.5405,210.9099,211.6396,211.3604,211.0991,211.6306,211.4595,211.8198,211.4505,211.2793,212.4505,211.9910,211.8198,211.0991,211.2703,211.7297,211.5405,212.0000,211.0991,211.5405,212.0000,211.0090,211.8108,210.9189,211.0901,212.2703,212.0000,212.5405,212.9910,211.5405,213.1712,212.3604,211.9099,210.9099,211.0991,211.4505,211.2793,211.5405,210.9189,211.0901,210.9099,211.8198,211.9009,211.0090,212.0000,208.7477,211.0901,211.4595,211.0901,211.9099,211.2793" -benchmark intrusive graph dependency handling with N nodes - 10,checking for dependencies,100,1005,2311500,21.4532,21.4190,21.4767,0.1394,0.0969,0.1786,"21.5005,21.5005,21.5711,21.0219,21.0119,21.0219,21.0020,21.0020,21.0119,21.0020,21.0020,21.0219,21.5005,21.5005,21.5005,21.4905,21.4905,21.5005,21.5005,21.4905,21.4905,21.4905,21.5005,21.4905,21.4905,21.4905,21.5005,21.5005,21.4905,21.4905,21.5005,21.5005,21.4905,21.4905,21.5005,21.5005,21.5005,21.4905,21.4905,21.5005,21.4905,21.4905,21.5005,21.5005,21.5005,21.5005,21.4905,21.5005,21.5005,21.4905,21.5005,21.5005,21.5005,21.5005,21.4905,21.5005,21.5005,21.4905,21.4905,21.5005,21.5005,21.4905,21.5005,21.5005,21.4905,21.4905,21.5005,21.5005,21.4905,21.5005,21.5005,21.4905,21.4905,21.5005,21.5005,21.5005,21.4905,21.4905,21.5005,21.5005,21.4905,21.4905,21.5005,21.5005,21.5005,21.4905,21.4905,21.5005,21.4905,21.4905,21.5005,21.5005,21.5005,21.4905,21.4905,21.5005,21.4905,21.4905,21.4905,21.5005" -benchmark intrusive graph dependency handling with N nodes - 100,creating nodes,100,61,2372900,388.2816,387.6408,390.0908,5.0522,2.1835,10.8009,"386.7541,386.5902,403.1803,386.4262,388.2295,386.5738,389.8689,385.5902,386.2623,388.7213,386.4262,388.3934,386.2623,385.4426,387.7377,386.0984,388.5574,386.5738,388.2131,386.5902,386.2459,386.9180,386.0984,387.2459,386.5902,386.5902,389.0492,387.0820,387.5738,391.5082,386.0984,389.2131,386.4262,387.9016,391.0164,396.7705,432.0820,386.4098,388.2295,389.5574,394.1311,385.6066,393.8197,389.8689,386.4262,387.9016,387.9016,385.9344,388.0656,386.0820,388.8852,385.6066,386.0820,388.0656,386.2623,388.8852,385.7705,387.9016,386.5902,392.8361,388.2295,386.4098,387.7377,386.0984,385.9344,389.5410,387.0820,388.0656,386.4262,386.0984,389.5410,387.0820,388.2295,386.5902,386.4098,387.4098,385.9180,388.0656,386.5902,388.2295,388.0656,393.0000,388.3934,386.4262,387.9016,386.0984,386.4098,387.2295,386.4262,388.7213,388.0656,385.9180,388.5574,386.5902,388.2295,387.9016,386.0984,388.2295,386.4262,388.0656" -benchmark intrusive graph dependency handling with N nodes - 100,creating and adding dependencies,100,6,2581800,3975.6917,3966.2283,4007.8017,77.8464,12.3315,173.3988,"3975.3333,3960.3333,4254.1667,3998.8333,3973.6667,3968.6667,3972.0000,3978.6667,3978.6667,3957.0000,3965.3333,3975.3333,3968.6667,3977.0000,3973.8333,3975.3333,3987.0000,3963.8333,3972.0000,3965.3333,3950.3333,3975.3333,3975.5000,3972.0000,3980.3333,3968.8333,3942.0000,3965.3333,3968.6667,3952.0000,3963.6667,3962.0000,3950.3333,3965.5000,3957.0000,4686.6667,3963.6667,3975.5000,3963.6667,3958.6667,3970.3333,3963.6667,3968.6667,3965.3333,3947.0000,3978.6667,3965.3333,3975.3333,3952.0000,3952.0000,3960.3333,3975.3333,3957.1667,3975.3333,3952.0000,3953.6667,3945.3333,3947.0000,3957.0000,3960.3333,3943.6667,3963.6667,3960.5000,3968.6667,3958.6667,3953.6667,3953.6667,3980.3333,3962.1667,3962.0000,3952.0000,3958.6667,3957.0000,3957.0000,3973.8333,3980.3333,3935.3333,3972.0000,3957.0000,3970.3333,3962.0000,3953.6667,3948.6667,3983.6667,3975.3333,3965.3333,3967.0000,3988.6667,3968.8333,3965.3333,3967.0000,3978.6667,3957.0000,3973.6667,3958.6667,3992.0000,3957.1667,3962.0000,3980.3333,3983.8333" -benchmark intrusive graph dependency handling with N nodes - 100,adding and removing dependencies,100,6,2680800,4731.4067,4728.2467,4734.3150,15.4597,13.6544,17.8326,"4753.5000,4725.1667,4761.8333,4738.5000,4716.8333,4716.6667,4736.6667,4736.6667,4725.0000,4735.1667,4731.8333,4743.5000,4726.8333,4733.5000,4741.8333,4726.8333,4750.1667,4735.1667,4746.8333,4711.8333,4735.1667,4720.1667,4733.5000,4713.3333,4748.6667,4730.0000,4755.1667,4738.5000,4740.1667,4711.6667,4757.0000,4735.1667,4755.1667,4740.1667,4748.5000,4718.5000,4745.1667,4733.5000,4736.8333,4733.5000,4746.8333,4688.3333,4755.1667,4731.6667,4750.3333,4691.6667,4728.5000,4728.5000,4716.8333,4701.6667,4716.8333,4746.8333,4723.5000,4740.1667,4733.5000,4755.1667,4736.8333,4713.5000,4730.1667,4718.3333,4718.3333,4746.8333,4740.1667,4735.1667,4716.8333,4733.5000,4713.5000,4741.8333,4731.8333,4735.1667,4716.6667,4738.3333,4741.8333,4748.5000,4741.8333,4746.8333,4703.3333,4710.1667,4718.5000,4741.8333,4698.3333,4753.5000,4736.8333,4746.8333,4726.8333,4735.1667,4708.3333,4725.1667,4721.8333,4715.1667,4703.3333,4733.5000,4735.1667,4698.5000,4716.8333,4733.5000,4741.8333,4718.3333,4748.6667,4743.5000" -benchmark intrusive graph dependency handling with N nodes - 100,checking for dependencies,100,13,2514200,1939.4492,1938.7523,1940.1885,3.6771,3.2705,4.2161,"1942.6923,1936.5385,1947.3077,1945.7692,1941.9231,1945.0000,1941.9231,1940.3846,1949.6154,1943.4615,1948.0769,1939.6154,1942.6923,1940.3846,1935.7692,1941.9231,1945.0000,1946.5385,1942.6923,1942.6923,1931.9231,1939.5385,1936.4615,1935.7692,1940.3846,1935.0000,1939.6154,1938.8462,1941.1538,1937.2308,1935.7692,1941.1538,1936.5385,1941.9231,1936.5385,1943.4615,1937.3077,1936.4615,1941.0769,1935.7692,1942.6923,1935.7692,1940.3846,1935.0000,1941.1538,1936.4615,1936.4615,1943.4615,1937.3077,1940.3846,1938.0769,1942.6923,1938.8462,1936.5385,1943.4615,1935.6923,1942.6923,1938.8462,1941.9231,1936.5385,1938.8462,1938.0769,1936.5385,1945.7692,1937.3077,1939.5385,1932.6154,1941.1538,1935.7692,1938.0769,1947.3077,1939.6154,1941.9231,1935.0000,1940.3846,1934.1538,1941.1538,1938.8462,1936.5385,1939.6154,1933.4615,1937.3077,1935.6923,1938.8462,1933.4615,1935.7692,1939.6154,1934.9231,1941.0769,1934.9231,1944.2308,1938.8462,1934.2308,1941.1538,1935.7692,1941.1538,1935.6923,1941.9231,1938.8462,1943.4615" -benchmark task handling > without access thread,generating and deleting tasks,100,1,355369000,3402787.3900,3357031.6100,3439358.8600,208539.6030,175538.5217,236623.6161,"3504130.0000,3507876.0000,3523116.0000,3527944.0000,3517856.0000,3520881.0000,3514359.0000,3519859.0000,3515410.0000,3522074.0000,3189092.0000,2977251.0000,2982401.0000,2995074.0000,2986288.0000,2980025.0000,2979155.0000,2984654.0000,2987501.0000,2979224.0000,3293981.0000,3519910.0000,3510351.0000,3528745.0000,3513447.0000,3511623.0000,3501184.0000,3509169.0000,3511594.0000,3517114.0000,3505641.0000,3513397.0000,3516843.0000,3509390.0000,3496465.0000,3500773.0000,3510942.0000,3515090.0000,3499480.0000,3512134.0000,3502156.0000,3519458.0000,3270716.0000,2973052.0000,2980196.0000,2971549.0000,2968183.0000,2970659.0000,2968404.0000,2961050.0000,2966400.0000,2968945.0000,3102628.0000,3518877.0000,3513878.0000,3511564.0000,3506363.0000,3504050.0000,3505231.0000,3514379.0000,3515621.0000,3518997.0000,3512525.0000,3504240.0000,3509369.0000,3511744.0000,3512224.0000,3504891.0000,3507736.0000,3518938.0000,3510642.0000,3507736.0000,3498178.0000,3507085.0000,3497556.0000,3517154.0000,3504330.0000,3520811.0000,3512195.0000,3517434.0000,3498379.0000,3504770.0000,3501965.0000,3518707.0000,3513366.0000,3511082.0000,3505362.0000,3521062.0000,3501604.0000,3510541.0000,3503559.0000,3512565.0000,3516443.0000,3511514.0000,3506544.0000,3511893.0000,3496465.0000,3501975.0000,3506554.0000,3505802.0000" -benchmark task handling > with access thread,generating and deleting tasks with access thread,100,1,728559200,7187649.3000,7139748.9300,7222394.0000,205994.1214,162017.6006,249666.4405,"7271260.0000,7213851.0000,7296667.0000,7286128.0000,7236925.0000,6627720.0000,6639422.0000,6592884.0000,6645244.0000,7255420.0000,7278173.0000,7281049.0000,7337686.0000,7229351.0000,7299834.0000,7234932.0000,7376329.0000,7296147.0000,7296378.0000,7227988.0000,7259407.0000,7287281.0000,7280698.0000,7261821.0000,7285267.0000,7228770.0000,7262293.0000,7269106.0000,7239971.0000,7286770.0000,6638721.0000,6639953.0000,6633742.0000,6986952.0000,6813942.0000,7225053.0000,7245472.0000,7256251.0000,7225594.0000,7284315.0000,7321666.0000,7373353.0000,7223891.0000,7283002.0000,7222858.0000,7290236.0000,7297580.0000,7277021.0000,7207769.0000,7282512.0000,7226556.0000,7272902.0000,7274867.0000,7282742.0000,7248177.0000,7321214.0000,7213501.0000,7276560.0000,7249408.0000,7300997.0000,7286479.0000,7235902.0000,7248708.0000,7271861.0000,7239801.0000,7272071.0000,7272613.0000,7231625.0000,7259728.0000,7244599.0000,7211888.0000,7115445.0000,6648669.0000,6645023.0000,6811940.0000,7258586.0000,7280948.0000,6606360.0000,6798394.0000,7313771.0000,7296117.0000,7264738.0000,7290036.0000,7244338.0000,7314924.0000,7280308.0000,7287761.0000,7247245.0000,7303551.0000,7234350.0000,7309593.0000,7354338.0000,7302219.0000,7233548.0000,7280619.0000,7271561.0000,7247565.0000,7271119.0000,7263215.0000,7231795.0000" -generating large task graphs,soup topology,100,1,129726200,1230049.8900,1204667.8300,1254022.7900,125844.9082,117899.9634,130006.7131,"1329688.0000,1237693.0000,1073131.0000,1076478.0000,1068803.0000,1069715.0000,1069174.0000,1075055.0000,1069234.0000,1068422.0000,1069625.0000,1141270.0000,1069935.0000,1070396.0000,1070205.0000,1062781.0000,1066609.0000,1068543.0000,1075526.0000,1069394.0000,1070466.0000,1070436.0000,1286877.0000,1331200.0000,1332172.0000,1336300.0000,1329267.0000,1329377.0000,1334487.0000,1330790.0000,1330108.0000,1337222.0000,1330860.0000,1330930.0000,1334828.0000,1331401.0000,1329407.0000,1335869.0000,1333675.0000,1330419.0000,1337102.0000,1330699.0000,1329567.0000,1335067.0000,1333215.0000,1332172.0000,1333876.0000,1329086.0000,1330740.0000,1333405.0000,1330258.0000,1329106.0000,1333635.0000,1330239.0000,1330489.0000,1332142.0000,1329046.0000,1328786.0000,1334076.0000,1334747.0000,1332303.0000,1336210.0000,1332533.0000,1329176.0000,1334867.0000,1340969.0000,1087127.0000,1071197.0000,1067441.0000,1065917.0000,1067972.0000,1071308.0000,1067260.0000,1066920.0000,1066528.0000,1072000.0000,1063583.0000,1068232.0000,1072710.0000,1065417.0000,1067330.0000,1066619.0000,1070797.0000,1066539.0000,1161889.0000,1340278.0000,1331551.0000,1329988.0000,1334597.0000,1331101.0000,1332793.0000,1333806.0000,1332753.0000,1331662.0000,1334587.0000,1327613.0000,1328896.0000,1332793.0000,1327093.0000,1327413.0000" -generating large task graphs,chain topology,100,1,3863400,41601.6200,41529.4500,41778.9500,533.5479,152.1716,951.4063,"41345.0000,41586.0000,44983.0000,42127.0000,42006.0000,41586.0000,41626.0000,41636.0000,41777.0000,41676.0000,41516.0000,41525.0000,41295.0000,41676.0000,41426.0000,41415.0000,41396.0000,41516.0000,41395.0000,41306.0000,41666.0000,41465.0000,41506.0000,41306.0000,41495.0000,41385.0000,41315.0000,41416.0000,41476.0000,41455.0000,41305.0000,41816.0000,41676.0000,41556.0000,41606.0000,41606.0000,41656.0000,41546.0000,41736.0000,41565.0000,41526.0000,41656.0000,41446.0000,41476.0000,41526.0000,45383.0000,41646.0000,41395.0000,41566.0000,41747.0000,41636.0000,41505.0000,41797.0000,41676.0000,41646.0000,41396.0000,41565.0000,41687.0000,41545.0000,41595.0000,41526.0000,41506.0000,41396.0000,41696.0000,41576.0000,41506.0000,41616.0000,41445.0000,41446.0000,41556.0000,41536.0000,41535.0000,41515.0000,41727.0000,41656.0000,41566.0000,41515.0000,41556.0000,41386.0000,41395.0000,41496.0000,41606.0000,41356.0000,41345.0000,41486.0000,41486.0000,41335.0000,41626.0000,41426.0000,41516.0000,41365.0000,41706.0000,41336.0000,41235.0000,41416.0000,41395.0000,41286.0000,41285.0000,41486.0000,41536.0000" -generating large task graphs,expanding tree topology,100,1,5933700,62903.1800,62760.4500,63133.7000,905.7861,616.7802,1352.2012,"61974.0000,62937.0000,66553.0000,63698.0000,63578.0000,62355.0000,62416.0000,62916.0000,62295.0000,63408.0000,67825.0000,63427.0000,63247.0000,63147.0000,63046.0000,61985.0000,63017.0000,63397.0000,62796.0000,62686.0000,63227.0000,63498.0000,63227.0000,62806.0000,62525.0000,62455.0000,62746.0000,62115.0000,62616.0000,62055.0000,62375.0000,62115.0000,62155.0000,62405.0000,62306.0000,61704.0000,62355.0000,62105.0000,62065.0000,61914.0000,61885.0000,62415.0000,61855.0000,62475.0000,62556.0000,62305.0000,62335.0000,62385.0000,62485.0000,63206.0000,63277.0000,62345.0000,62445.0000,63297.0000,63166.0000,63007.0000,62836.0000,63277.0000,63127.0000,62997.0000,63327.0000,63417.0000,63287.0000,62877.0000,62996.0000,62897.0000,62986.0000,63227.0000,63217.0000,62836.0000,62205.0000,63327.0000,63167.0000,67254.0000,63698.0000,63127.0000,63077.0000,63046.0000,63197.0000,62836.0000,63367.0000,62796.0000,62596.0000,62876.0000,63267.0000,63217.0000,63127.0000,62956.0000,62916.0000,62195.0000,61944.0000,62866.0000,61984.0000,62245.0000,62335.0000,61995.0000,63237.0000,62756.0000,62976.0000,63518.0000" -generating large task graphs,contracting tree topology,100,1,9972900,99968.3200,99857.2700,100172.3100,745.7708,460.4163,1162.2683,"99575.0000,99335.0000,104365.0000,100137.0000,100397.0000,100327.0000,99797.0000,99596.0000,100207.0000,99445.0000,99817.0000,99736.0000,99305.0000,99636.0000,99666.0000,99525.0000,99977.0000,99926.0000,99115.0000,99054.0000,99726.0000,99696.0000,100057.0000,99495.0000,99616.0000,99727.0000,103092.0000,99726.0000,99897.0000,99916.0000,100578.0000,99366.0000,99295.0000,100047.0000,99155.0000,99987.0000,99906.0000,99987.0000,100066.0000,99906.0000,100067.0000,99726.0000,99386.0000,99906.0000,99546.0000,99295.0000,100147.0000,99546.0000,99876.0000,99896.0000,99586.0000,100237.0000,99726.0000,100027.0000,100397.0000,100187.0000,100298.0000,99886.0000,99636.0000,100007.0000,100087.0000,100066.0000,99535.0000,99937.0000,99526.0000,99866.0000,103794.0000,100207.0000,99847.0000,99425.0000,99967.0000,99466.0000,99656.0000,100277.0000,100267.0000,100137.0000,99907.0000,99826.0000,99896.0000,100237.0000,100477.0000,100708.0000,99506.0000,100017.0000,99836.0000,99937.0000,99455.0000,100377.0000,99956.0000,99816.0000,99926.0000,100227.0000,99716.0000,99716.0000,99526.0000,100077.0000,99816.0000,99936.0000,100357.0000,99967.0000" -generating large task graphs,wave_sim topology,100,1,39467400,395342.4400,394914.6100,396607.4300,3425.4306,1417.2847,7454.1559,"394124.0000,394565.0000,425684.0000,403252.0000,396448.0000,396128.0000,395076.0000,394986.0000,395387.0000,394535.0000,394935.0000,394464.0000,398793.0000,395607.0000,395718.0000,394645.0000,394144.0000,393313.0000,394294.0000,394185.0000,393372.0000,394174.0000,397811.0000,393403.0000,392942.0000,393613.0000,393202.0000,394264.0000,393433.0000,393954.0000,394485.0000,394555.0000,400266.0000,394745.0000,394315.0000,394825.0000,394465.0000,394164.0000,394225.0000,395887.0000,395287.0000,394975.0000,398032.0000,394385.0000,395186.0000,394445.0000,394835.0000,394585.0000,394435.0000,394986.0000,394735.0000,395226.0000,399003.0000,395006.0000,394345.0000,394705.0000,394094.0000,394795.0000,395076.0000,395577.0000,394775.0000,394905.0000,399254.0000,394405.0000,395246.0000,395146.0000,393894.0000,394816.0000,395146.0000,394134.0000,394244.0000,393834.0000,393794.0000,398162.0000,395176.0000,394525.0000,394384.0000,393824.0000,394946.0000,394936.0000,394675.0000,395286.0000,395527.0000,398933.0000,394955.0000,395186.0000,394404.0000,395026.0000,394976.0000,393854.0000,393763.0000,394175.0000,393953.0000,397120.0000,394105.0000,394234.0000,394635.0000,394395.0000,394565.0000,394835.0000" -generating large task graphs,jacobi topology,100,1,13248300,118149.0100,117087.3400,119640.6200,6373.3505,4895.7558,7815.5142,"135975.0000,135273.0000,119514.0000,115676.0000,115847.0000,115286.0000,115556.0000,115566.0000,115706.0000,115146.0000,115486.0000,115416.0000,115245.0000,115486.0000,115386.0000,115767.0000,115496.0000,115406.0000,115626.0000,115636.0000,115777.0000,120636.0000,115385.0000,115445.0000,115917.0000,115436.0000,115226.0000,116047.0000,115726.0000,115657.0000,115125.0000,115215.0000,115957.0000,115797.0000,116117.0000,116208.0000,115986.0000,116067.0000,115856.0000,116248.0000,116327.0000,115957.0000,115897.0000,115927.0000,115426.0000,115666.0000,115827.0000,115586.0000,115386.0000,115336.0000,115496.0000,115255.0000,115446.0000,115617.0000,115716.0000,115696.0000,122509.0000,115666.0000,115857.0000,115696.0000,115637.0000,115265.0000,115476.0000,115967.0000,115667.0000,115165.0000,115746.0000,115676.0000,115496.0000,115857.0000,115706.0000,115747.0000,115325.0000,115807.0000,116057.0000,115897.0000,115907.0000,115726.0000,115576.0000,115847.0000,115636.0000,115536.0000,115916.0000,115707.0000,115376.0000,115496.0000,116077.0000,115907.0000,115856.0000,115817.0000,126827.0000,136175.0000,135814.0000,135845.0000,135654.0000,136015.0000,135434.0000,135584.0000,136075.0000,135995.0000" -generating large command graphs for N nodes - 1,soup topology,100,1,203142900,1935466.2300,1895982.9300,1971037.6300,189986.4989,173159.8855,200104.1723,"1660154.0000,1669381.0000,2097583.0000,2071825.0000,2068137.0000,2112100.0000,2065472.0000,2070612.0000,2067036.0000,2072726.0000,2064851.0000,2068638.0000,2067466.0000,2072064.0000,2064841.0000,2069149.0000,2068538.0000,2074770.0000,2065833.0000,2067326.0000,2061605.0000,2069760.0000,2065873.0000,2066915.0000,2063058.0000,2073247.0000,2062586.0000,2066073.0000,2062757.0000,2070031.0000,2066825.0000,2068508.0000,2069871.0000,2065893.0000,2071574.0000,2056765.0000,2071183.0000,2072325.0000,2074990.0000,2063349.0000,2076573.0000,2067036.0000,2068769.0000,2059791.0000,2071884.0000,2067847.0000,2071674.0000,2065442.0000,2074580.0000,2067166.0000,2069750.0000,2063629.0000,2071574.0000,2066855.0000,2072486.0000,1719196.0000,1668269.0000,1658130.0000,1664262.0000,1659012.0000,1659733.0000,1664101.0000,1658431.0000,1666516.0000,1661417.0000,1659944.0000,1664842.0000,2060212.0000,2074359.0000,2068749.0000,2072996.0000,2063498.0000,2070752.0000,2072736.0000,2006611.0000,1663411.0000,1667388.0000,1660775.0000,1667498.0000,1660775.0000,1664282.0000,1661056.0000,2061635.0000,2070562.0000,2067126.0000,2077415.0000,2070352.0000,2071664.0000,1663771.0000,1671376.0000,1659242.0000,1666316.0000,1661888.0000,1667357.0000,1662648.0000,1660304.0000,1667358.0000,1662919.0000,1665824.0000,1663169.0000" -generating large command graphs for N nodes - 1,chain topology,100,1,13241600,136341.0200,136151.7400,136711.2800,1304.6917,721.8971,2044.0133,"137127.0000,136376.0000,144281.0000,136946.0000,136465.0000,136025.0000,136126.0000,136135.0000,135724.0000,135795.0000,135674.0000,135825.0000,136345.0000,136256.0000,136145.0000,136105.0000,135935.0000,136255.0000,135694.0000,135935.0000,136596.0000,135644.0000,142257.0000,136907.0000,135834.0000,136106.0000,136175.0000,135644.0000,135995.0000,135955.0000,135524.0000,135845.0000,136205.0000,135664.0000,135874.0000,135234.0000,135333.0000,135684.0000,135604.0000,135464.0000,136396.0000,135794.0000,135674.0000,136335.0000,135795.0000,135855.0000,135844.0000,136146.0000,135944.0000,136065.0000,136275.0000,140674.0000,136506.0000,136165.0000,136336.0000,136235.0000,136176.0000,136035.0000,135834.0000,135795.0000,135875.0000,136445.0000,136155.0000,136365.0000,136195.0000,136566.0000,136436.0000,135774.0000,136035.0000,136115.0000,135974.0000,136116.0000,136295.0000,136286.0000,135504.0000,135824.0000,135905.0000,136105.0000,136466.0000,136035.0000,136236.0000,142297.0000,137037.0000,136515.0000,136195.0000,136255.0000,135975.0000,135945.0000,135835.0000,136105.0000,135785.0000,135954.0000,136756.0000,136556.0000,136225.0000,136196.0000,136205.0000,136586.0000,136376.0000,136005.0000" -generating large command graphs for N nodes - 1,expanding tree topology,100,1,18213300,175669.0400,173227.5000,177984.2200,12153.6916,11665.8174,12537.8484,"185929.0000,185659.0000,173146.0000,163627.0000,162706.0000,161924.0000,161884.0000,162144.0000,162385.0000,161414.0000,161773.0000,161594.0000,161834.0000,162786.0000,161904.0000,162124.0000,160892.0000,161524.0000,161022.0000,163186.0000,161954.0000,169889.0000,162245.0000,162605.0000,160582.0000,161573.0000,161483.0000,161514.0000,161453.0000,161633.0000,161022.0000,160080.0000,160902.0000,160992.0000,161112.0000,161112.0000,160572.0000,160681.0000,161003.0000,160140.0000,161322.0000,161473.0000,160661.0000,160952.0000,160692.0000,167705.0000,190247.0000,187111.0000,187052.0000,187422.0000,185639.0000,185318.0000,185008.0000,185929.0000,186320.0000,187262.0000,185900.0000,185999.0000,186140.0000,186000.0000,185509.0000,185619.0000,185028.0000,185499.0000,185268.0000,186300.0000,185258.0000,191871.0000,186280.0000,186581.0000,186591.0000,186070.0000,186591.0000,186250.0000,186230.0000,185719.0000,185348.0000,185699.0000,186882.0000,186180.0000,185819.0000,185499.0000,186040.0000,186320.0000,186231.0000,185739.0000,185859.0000,186371.0000,191250.0000,187602.0000,186139.0000,185649.0000,185599.0000,186270.0000,186481.0000,186110.0000,185368.0000,185810.0000,186200.0000,185619.0000" -generating large command graphs for N nodes - 1,contracting tree topology,100,1,21856200,229446.5900,227662.2000,230387.1100,6413.8668,3848.4650,9979.4883,"194205.0000,195558.0000,240633.0000,231275.0000,231706.0000,231376.0000,236164.0000,229642.0000,228410.0000,229583.0000,229532.0000,228871.0000,228651.0000,228730.0000,230023.0000,229562.0000,228631.0000,229121.0000,229863.0000,228701.0000,229632.0000,229903.0000,229953.0000,228921.0000,235383.0000,230094.0000,229101.0000,230364.0000,229442.0000,230174.0000,229191.0000,228820.0000,229802.0000,230374.0000,228821.0000,229883.0000,229853.0000,229102.0000,229312.0000,229532.0000,230514.0000,234271.0000,230263.0000,229432.0000,228571.0000,229712.0000,230644.0000,229873.0000,230033.0000,230364.0000,229101.0000,229442.0000,231105.0000,229813.0000,230264.0000,230123.0000,229462.0000,230524.0000,234481.0000,230053.0000,230093.0000,230073.0000,229643.0000,230514.0000,229482.0000,229282.0000,229572.0000,230274.0000,230133.0000,229552.0000,229702.0000,229943.0000,230013.0000,230714.0000,229542.0000,231446.0000,235904.0000,231386.0000,231035.0000,231396.0000,230434.0000,231566.0000,230194.0000,230153.0000,230293.0000,230234.0000,229813.0000,230053.0000,229432.0000,230655.0000,229672.0000,231486.0000,229793.0000,234863.0000,229372.0000,229792.0000,230233.0000,230174.0000,244560.0000,196249.0000" -generating large command graphs for N nodes - 1,wave_sim topology,100,1,99869300,1073548.2000,1056258.6800,1088925.6700,83059.9418,74992.7505,88233.1275,"1131872.0000,1126712.0000,1158233.0000,1145188.0000,1132654.0000,1129619.0000,1127153.0000,1131692.0000,1129829.0000,1128206.0000,1130901.0000,1129458.0000,1131832.0000,1127144.0000,1127604.0000,1124439.0000,1125170.0000,1128346.0000,1132975.0000,1126462.0000,1125931.0000,1133015.0000,1130210.0000,1126151.0000,1125901.0000,1131442.0000,1144647.0000,953835.0000,945108.0000,951731.0000,944186.0000,947122.0000,947553.0000,953314.0000,948865.0000,949687.0000,951700.0000,954977.0000,948675.0000,950428.0000,946982.0000,951871.0000,948014.0000,946161.0000,946942.0000,952533.0000,950569.0000,950279.0000,951010.0000,948214.0000,953775.0000,949186.0000,950910.0000,950759.0000,961280.0000,952673.0000,950438.0000,953364.0000,1050358.0000,1129308.0000,1130560.0000,1130470.0000,1135820.0000,1128225.0000,1127484.0000,1135629.0000,1130550.0000,1131451.0000,1129227.0000,1135500.0000,1128657.0000,1128556.0000,1136973.0000,1127224.0000,1123196.0000,1128446.0000,1133406.0000,1129127.0000,1126602.0000,1133186.0000,1129398.0000,1131502.0000,1130179.0000,1131893.0000,1124900.0000,1127815.0000,1128196.0000,1124889.0000,1124418.0000,1129829.0000,1128416.0000,1126443.0000,1129177.0000,1130320.0000,1126974.0000,1127224.0000,1126993.0000,1127956.0000,1128526.0000,1124890.0000" -generating large command graphs for N nodes - 1,jacobi topology,100,1,38372600,395045.8400,394713.3700,395626.9900,2176.7657,1464.6429,3861.1241,"396038.0000,394936.0000,409964.0000,396258.0000,395737.0000,395567.0000,395597.0000,396419.0000,395026.0000,394665.0000,394565.0000,398953.0000,395256.0000,394145.0000,394404.0000,393823.0000,393503.0000,395257.0000,394855.0000,395647.0000,394164.0000,398172.0000,394785.0000,394184.0000,394304.0000,394805.0000,394024.0000,394505.0000,393873.0000,393553.0000,391950.0000,392731.0000,398773.0000,393513.0000,393984.0000,394325.0000,396218.0000,394956.0000,395286.0000,395096.0000,393563.0000,395046.0000,398042.0000,395096.0000,394225.0000,394725.0000,395667.0000,395697.0000,394705.0000,394044.0000,393903.0000,393864.0000,401508.0000,395587.0000,393112.0000,394275.0000,392731.0000,393863.0000,394324.0000,394715.0000,393673.0000,394435.0000,397521.0000,394715.0000,393854.0000,394265.0000,392641.0000,393543.0000,394965.0000,393723.0000,394736.0000,394705.0000,399104.0000,394695.0000,393493.0000,393533.0000,393493.0000,394605.0000,393403.0000,393453.0000,394014.0000,395076.0000,399704.0000,396148.0000,395366.0000,395547.0000,394805.0000,395086.0000,393473.0000,393833.0000,394394.0000,395948.0000,399274.0000,395527.0000,395126.0000,394195.0000,393713.0000,394475.0000,393503.0000,394314.0000" -generating large command graphs for N nodes - 4,soup topology,100,1,234656800,2266553.7400,2223482.3600,2310008.1300,221082.0384,215551.1539,225473.9742,"2495998.0000,2497771.0000,2514493.0000,2489827.0000,2496439.0000,2491719.0000,2490538.0000,2497120.0000,2494114.0000,2493884.0000,2498243.0000,2485789.0000,2494265.0000,2490328.0000,2490076.0000,2494956.0000,2484216.0000,2498654.0000,2497020.0000,2487121.0000,2493073.0000,2495587.0000,2493925.0000,2497040.0000,2487973.0000,2494956.0000,2509434.0000,2056896.0000,2048831.0000,2040636.0000,2045204.0000,2034113.0000,2036828.0000,2030266.0000,2047959.0000,2040876.0000,2047659.0000,2039744.0000,2046606.0000,2037329.0000,2043591.0000,2046206.0000,2337979.0000,2495548.0000,2487612.0000,2490257.0000,2495207.0000,2500456.0000,2495978.0000,2491720.0000,2494826.0000,2487702.0000,2224344.0000,2043370.0000,2034503.0000,2048270.0000,2038982.0000,2045875.0000,2035566.0000,2042839.0000,2039713.0000,2045675.0000,2477824.0000,2083797.0000,2038220.0000,2047167.0000,2040054.0000,2112622.0000,2039653.0000,2049993.0000,2036457.0000,2044733.0000,2038461.0000,2419513.0000,2495127.0000,2172035.0000,2043701.0000,2032169.0000,2042428.0000,2038651.0000,2042219.0000,2031107.0000,2034343.0000,2028191.0000,2032921.0000,2027501.0000,2032029.0000,2023833.0000,2033631.0000,2026759.0000,2296309.0000,2491319.0000,2492652.0000,2497992.0000,2487873.0000,2494646.0000,2493443.0000,2484356.0000,2492081.0000,2485839.0000" -generating large command graphs for N nodes - 4,chain topology,100,1,35455300,369353.6700,368954.2300,369889.5100,2336.5288,1835.5999,3101.5771,"375249.0000,370099.0000,380078.0000,377252.0000,369758.0000,370409.0000,369087.0000,369487.0000,368125.0000,369297.0000,368646.0000,368054.0000,367824.0000,369047.0000,374517.0000,368245.0000,369368.0000,367754.0000,367353.0000,369357.0000,369297.0000,368426.0000,368536.0000,369076.0000,367854.0000,374788.0000,368886.0000,368255.0000,368766.0000,367634.0000,367283.0000,369217.0000,367042.0000,367775.0000,368195.0000,374287.0000,369006.0000,368856.0000,368906.0000,368185.0000,370268.0000,369387.0000,367965.0000,367844.0000,367464.0000,367374.0000,373385.0000,368505.0000,368485.0000,367845.0000,368706.0000,367965.0000,369146.0000,367964.0000,368426.0000,369167.0000,368375.0000,374877.0000,368496.0000,367193.0000,368125.0000,369698.0000,368786.0000,370489.0000,370209.0000,368897.0000,369377.0000,369568.0000,374657.0000,369949.0000,369938.0000,368816.0000,369878.0000,369437.0000,368887.0000,367944.0000,369778.0000,364709.0000,367463.0000,372212.0000,367444.0000,366562.0000,367394.0000,365870.0000,365820.0000,369808.0000,369026.0000,369698.0000,369578.0000,368245.0000,374317.0000,369427.0000,368455.0000,369929.0000,370008.0000,370229.0000,369808.0000,368686.0000,369227.0000,368906.0000" -generating large command graphs for N nodes - 4,expanding tree topology,100,1,46100700,425830.4700,420581.4500,431505.9700,27770.1240,25903.7995,29220.4516,"402811.0000,403983.0000,472984.0000,463035.0000,461332.0000,461963.0000,460390.0000,461872.0000,460790.0000,466792.0000,461782.0000,460129.0000,459848.0000,459688.0000,459458.0000,460380.0000,461973.0000,461833.0000,464918.0000,461401.0000,461152.0000,460380.0000,460159.0000,458577.0000,458055.0000,460279.0000,459589.0000,464137.0000,460931.0000,459608.0000,458977.0000,458756.0000,460070.0000,460009.0000,458787.0000,463436.0000,461421.0000,461131.0000,481590.0000,407509.0000,406557.0000,405436.0000,404344.0000,405385.0000,403973.0000,410796.0000,404624.0000,402429.0000,406126.0000,404153.0000,405165.0000,403632.0000,404664.0000,403462.0000,405816.0000,410125.0000,402881.0000,401999.0000,403302.0000,402640.0000,404013.0000,405035.0000,403952.0000,404413.0000,412550.0000,405055.0000,400195.0000,400225.0000,405635.0000,404213.0000,402751.0000,403502.0000,403642.0000,404183.0000,410976.0000,404083.0000,404554.0000,401909.0000,404514.0000,403732.0000,403282.0000,402380.0000,404403.0000,403942.0000,410304.0000,405235.0000,404193.0000,404283.0000,403883.0000,403502.0000,403662.0000,407098.0000,404063.0000,403642.0000,410355.0000,403061.0000,404955.0000,404183.0000,404383.0000,403682.0000" -generating large command graphs for N nodes - 4,contracting tree topology,100,1,47807400,460596.5000,454349.2700,467372.6000,33147.7209,30688.6273,34610.2575,"504423.0000,503612.0000,513841.0000,509243.0000,506257.0000,502559.0000,505275.0000,505755.0000,504092.0000,502639.0000,504142.0000,469186.0000,435182.0000,434261.0000,434691.0000,434962.0000,434781.0000,435733.0000,434991.0000,436174.0000,441815.0000,434060.0000,433659.0000,434120.0000,434871.0000,433840.0000,433759.0000,433920.0000,434300.0000,441324.0000,433749.0000,434480.0000,434110.0000,434039.0000,432747.0000,433218.0000,434581.0000,434611.0000,442746.0000,433449.0000,434050.0000,434029.0000,434520.0000,435513.0000,434631.0000,433960.0000,435132.0000,441414.0000,434470.0000,435612.0000,435121.0000,435853.0000,435463.0000,435402.0000,435222.0000,435613.0000,434140.0000,442746.0000,435433.0000,436043.0000,433168.0000,434200.0000,434972.0000,435853.0000,434671.0000,435533.0000,443468.0000,434701.0000,435663.0000,435523.0000,434130.0000,436114.0000,435964.0000,436334.0000,434792.0000,483012.0000,504413.0000,504533.0000,504333.0000,505555.0000,505685.0000,502370.0000,503171.0000,508601.0000,503622.0000,503281.0000,505105.0000,505415.0000,503682.0000,503231.0000,505375.0000,511967.0000,503291.0000,503511.0000,503541.0000,502670.0000,504133.0000,503692.0000,503171.0000,507640.0000" -generating large command graphs for N nodes - 4,wave_sim topology,100,1,224387600,2213953.1800,2182110.0900,2242884.6700,154968.0033,141776.1327,163756.4841,"2331847.0000,2314635.0000,2358388.0000,2342928.0000,2324353.0000,2318121.0000,2322639.0000,2320605.0000,2322108.0000,2321788.0000,2319284.0000,2325124.0000,2315146.0000,2323552.0000,2323571.0000,2311288.0000,2319123.0000,2320706.0000,2321488.0000,2317239.0000,2319734.0000,2314224.0000,2323502.0000,2326427.0000,2129343.0000,2004748.0000,1997694.0000,1993145.0000,1988938.0000,1992744.0000,1987865.0000,1994117.0000,1989408.0000,1999306.0000,1994818.0000,1999848.0000,1993376.0000,1995921.0000,1996241.0000,2087775.0000,2329513.0000,2332899.0000,2323562.0000,2330695.0000,2326717.0000,2324914.0000,2328661.0000,2322760.0000,2332559.0000,2329323.0000,2331376.0000,2335194.0000,2330345.0000,2329032.0000,2323882.0000,2321477.0000,2380769.0000,2324213.0000,2324935.0000,2084548.0000,1986643.0000,2032710.0000,2173768.0000,2322700.0000,2326988.0000,2318903.0000,2315226.0000,2321838.0000,2313923.0000,2323021.0000,2329162.0000,2308814.0000,2004657.0000,1989278.0000,1988577.0000,1991492.0000,1992975.0000,1984409.0000,1988487.0000,1974770.0000,1978668.0000,1971555.0000,1974550.0000,1976664.0000,1984488.0000,1997364.0000,1989628.0000,2319655.0000,2319854.0000,2328391.0000,2330826.0000,2326768.0000,2331657.0000,2333069.0000,2326587.0000,2330925.0000,2325114.0000,2331526.0000,2326027.0000,2333180.0000" -generating large command graphs for N nodes - 4,jacobi topology,100,1,88188600,880479.5500,872085.2300,885832.1600,33768.2592,22985.5940,45080.8869,"887389.0000,885335.0000,768554.0000,765308.0000,757443.0000,755930.0000,757222.0000,757663.0000,757332.0000,880536.0000,886497.0000,888021.0000,887018.0000,895284.0000,892108.0000,891457.0000,890245.0000,889243.0000,896958.0000,887810.0000,888141.0000,886708.0000,896075.0000,889964.0000,891908.0000,889603.0000,886728.0000,890355.0000,886958.0000,887099.0000,889964.0000,896487.0000,889283.0000,890395.0000,889443.0000,887520.0000,949176.0000,890084.0000,890565.0000,890184.0000,892900.0000,890996.0000,889804.0000,889764.0000,895825.0000,891658.0000,887860.0000,888892.0000,887510.0000,890625.0000,885655.0000,886557.0000,886187.0000,890094.0000,886247.0000,888391.0000,884634.0000,884844.0000,889904.0000,887600.0000,887189.0000,886417.0000,891837.0000,888953.0000,887529.0000,887670.0000,884344.0000,891326.0000,885515.0000,886317.0000,885376.0000,892639.0000,884704.0000,885937.0000,885896.0000,885666.0000,890424.0000,882409.0000,887218.0000,888200.0000,891346.0000,887890.0000,887680.0000,891126.0000,890676.0000,896376.0000,889023.0000,889824.0000,887910.0000,893491.0000,888812.0000,888562.0000,889002.0000,886327.0000,892158.0000,885606.0000,885305.0000,885956.0000,894954.0000,890425.0000" -generating large command graphs for N nodes - 16,soup topology,100,1,314075200,3035953.0000,2983949.6400,3080261.6300,245087.7158,214847.1014,267666.3728,"3182039.0000,3180045.0000,2629131.0000,2615645.0000,2622498.0000,2609243.0000,2620905.0000,2622638.0000,2614063.0000,3138186.0000,3180746.0000,3191818.0000,3182199.0000,3178251.0000,3182068.0000,3179183.0000,3235951.0000,3183211.0000,3178071.0000,3178993.0000,3179183.0000,3184333.0000,3183811.0000,3179814.0000,3180215.0000,3185295.0000,3180656.0000,3182680.0000,3179544.0000,3183081.0000,3183862.0000,3180315.0000,3182049.0000,3175908.0000,3185065.0000,3186367.0000,3180857.0000,3184013.0000,3179444.0000,3180726.0000,3185655.0000,2683994.0000,2615825.0000,2618731.0000,2622207.0000,2614222.0000,2616407.0000,2617639.0000,2796118.0000,3177179.0000,3185035.0000,3183051.0000,3182169.0000,3185095.0000,3184594.0000,3178071.0000,3186797.0000,2975768.0000,2621997.0000,2619853.0000,2614983.0000,2619122.0000,2618521.0000,2621296.0000,2623721.0000,2608722.0000,2593073.0000,2607289.0000,2593383.0000,3162792.0000,3189733.0000,3177049.0000,3181878.0000,3181107.0000,3185445.0000,3185486.0000,3190766.0000,3184543.0000,3177660.0000,3181187.0000,3182941.0000,3196737.0000,3188331.0000,3171819.0000,3190164.0000,3178162.0000,3179634.0000,3183191.0000,3180936.0000,3192979.0000,3190955.0000,3195023.0000,3192038.0000,3182550.0000,3181779.0000,3186557.0000,3187479.0000,3183270.0000,3176147.0000,3184343.0000" -generating large command graphs for N nodes - 16,chain topology,100,1,120184700,1216977.5600,1201396.1300,1230711.1000,74800.4423,66367.9731,80804.3112,"1261759.0000,1258403.0000,1265526.0000,1265276.0000,1274443.0000,1096556.0000,1097648.0000,1107777.0000,1095383.0000,1094852.0000,1093861.0000,1092959.0000,1087629.0000,1089512.0000,1098901.0000,1093530.0000,1090434.0000,1094101.0000,1099231.0000,1094381.0000,1091596.0000,1093570.0000,1098108.0000,1098699.0000,1100954.0000,1107205.0000,1101344.0000,1097738.0000,1099511.0000,1110071.0000,1095714.0000,1096365.0000,1112386.0000,1265466.0000,1264274.0000,1261719.0000,1268351.0000,1264534.0000,1259164.0000,1313026.0000,1262130.0000,1261068.0000,1264915.0000,1258262.0000,1259354.0000,1266257.0000,1262129.0000,1259415.0000,1268211.0000,1259946.0000,1263923.0000,1267329.0000,1260576.0000,1258903.0000,1262290.0000,1263562.0000,1259564.0000,1260736.0000,1260747.0000,1260827.0000,1258192.0000,1269213.0000,1257590.0000,1257551.0000,1267389.0000,1262430.0000,1259324.0000,1269563.0000,1258523.0000,1255237.0000,1261568.0000,1266387.0000,1263993.0000,1261788.0000,1267129.0000,1260196.0000,1263833.0000,1264093.0000,1262641.0000,1259064.0000,1267309.0000,1258532.0000,1265045.0000,1268812.0000,1260517.0000,1258423.0000,1265295.0000,1260988.0000,1258673.0000,1264544.0000,1262359.0000,1262460.0000,1263341.0000,1266127.0000,1260586.0000,1260577.0000,1266087.0000,1259955.0000,1262721.0000,1267600.0000" -generating large command graphs for N nodes - 16,expanding tree topology,100,1,109964700,1103570.4200,1087523.5900,1118194.0000,78286.6498,71238.0823,83488.2575,"1150568.0000,1153614.0000,1190634.0000,1153534.0000,1153283.0000,1155988.0000,1154215.0000,1150648.0000,1158263.0000,1150598.0000,1150418.0000,1154466.0000,1158413.0000,1152822.0000,1152412.0000,1158533.0000,1153543.0000,1149546.0000,1157781.0000,1155707.0000,1153293.0000,1156509.0000,1161769.0000,1155658.0000,1150187.0000,1158263.0000,1153123.0000,1156289.0000,1152863.0000,1160617.0000,1153604.0000,1152051.0000,1160336.0000,990465.0000,988170.0000,985885.0000,996355.0000,989011.0000,988951.0000,985796.0000,996516.0000,990033.0000,987458.0000,985154.0000,991697.0000,988792.0000,986968.0000,985415.0000,994272.0000,987770.0000,991807.0000,988992.0000,990565.0000,997728.0000,989603.0000,990935.0000,989403.0000,997899.0000,989462.0000,990515.0000,989633.0000,998299.0000,987880.0000,990605.0000,989814.0000,1140799.0000,1153503.0000,1151961.0000,1162621.0000,1155077.0000,1154015.0000,1157541.0000,1161379.0000,1153664.0000,1149757.0000,1161388.0000,1157091.0000,1154866.0000,1163152.0000,1155297.0000,1154175.0000,1153814.0000,1163603.0000,1154746.0000,1154064.0000,1160446.0000,1157652.0000,1155507.0000,1153483.0000,1156109.0000,1150919.0000,1154556.0000,1240889.0000,1149646.0000,1156809.0000,1155567.0000,1160758.0000,1155798.0000,1149606.0000,1155388.0000" -generating large command graphs for N nodes - 16,contracting tree topology,100,1,122687200,1144743.7700,1126790.8600,1163276.6100,92931.5218,90647.1199,97137.6725,"1239987.0000,1244176.0000,1078762.0000,1064074.0000,1058724.0000,1056670.0000,1056109.0000,1064725.0000,1057211.0000,1058523.0000,1059675.0000,1058264.0000,1057752.0000,1057662.0000,1066409.0000,1058273.0000,1054366.0000,1057762.0000,1062932.0000,1058723.0000,1058603.0000,1059886.0000,1065948.0000,1055898.0000,1056359.0000,1054316.0000,1063603.0000,1054596.0000,1055949.0000,1083982.0000,1245949.0000,1245809.0000,1240528.0000,1247772.0000,1243584.0000,1242803.0000,1243504.0000,1242062.0000,1243063.0000,1252141.0000,1247242.0000,1244105.0000,1253333.0000,1243805.0000,1241501.0000,1247612.0000,1244687.0000,1245618.0000,1240048.0000,1247402.0000,1248183.0000,1243033.0000,1250578.0000,1240438.0000,1244887.0000,1252522.0000,1242122.0000,1244346.0000,1245748.0000,1241730.0000,1240919.0000,1316352.0000,1246540.0000,1246530.0000,1238204.0000,1240920.0000,1254826.0000,1073031.0000,1059596.0000,1054957.0000,1055818.0000,1055958.0000,1062120.0000,1055718.0000,1053143.0000,1053083.0000,1063643.0000,1057392.0000,1055247.0000,1063242.0000,1055077.0000,1054005.0000,1056890.0000,1062121.0000,1051871.0000,1057852.0000,1058363.0000,1066900.0000,1054315.0000,1051941.0000,1055077.0000,1081297.0000,1055117.0000,1175776.0000,1247572.0000,1240829.0000,1239587.0000,1237332.0000,1244606.0000,1244536.0000" -generating large command graphs for N nodes - 16,wave_sim topology,100,1,452026600,4304168.2300,4236095.1900,4364634.4500,326962.1314,294723.7348,351477.6385,"3965203.0000,4537498.0000,3807915.0000,3797615.0000,4118494.0000,4558708.0000,4538049.0000,4516819.0000,4627529.0000,4525436.0000,4531627.0000,4538159.0000,4443109.0000,3774832.0000,3781294.0000,4158660.0000,4562195.0000,4559160.0000,4546545.0000,4544922.0000,4540935.0000,4547006.0000,4556004.0000,4530625.0000,4531347.0000,4536546.0000,4366724.0000,3773880.0000,3793638.0000,3881304.0000,4375962.0000,4554470.0000,4544391.0000,4537629.0000,4562506.0000,4536486.0000,4541877.0000,4526708.0000,4543069.0000,4550041.0000,4412882.0000,3781284.0000,3790161.0000,4201402.0000,4567214.0000,4536706.0000,4572344.0000,4545634.0000,4534653.0000,4538691.0000,4549471.0000,4533410.0000,3886464.0000,3772888.0000,3830478.0000,3735788.0000,3756789.0000,3772067.0000,3755166.0000,4398194.0000,4532398.0000,4518893.0000,4540514.0000,4521007.0000,4537678.0000,4534663.0000,4548620.0000,4543078.0000,4531838.0000,4063450.0000,3882587.0000,3780693.0000,3829937.0000,4531376.0000,4539471.0000,4528712.0000,4448850.0000,3768180.0000,3778490.0000,4099168.0000,4536426.0000,4532969.0000,4554200.0000,4478687.0000,3782016.0000,3770584.0000,3845106.0000,3757370.0000,4404255.0000,4529613.0000,4533090.0000,4513773.0000,4536897.0000,4535084.0000,4535584.0000,4532759.0000,4528070.0000,4555332.0000,3874742.0000,3784060.0000" -generating large command graphs for N nodes - 16,jacobi topology,100,1,242693900,2556989.4200,2494589.1600,2660277.8000,400626.5559,273774.1288,610914.3733,"2554810.0000,2600797.0000,2571141.0000,2563646.0000,2556453.0000,2552335.0000,2407440.0000,2155614.0000,2156004.0000,2155182.0000,2159450.0000,2219154.0000,2155433.0000,2159321.0000,2155603.0000,2160693.0000,2161153.0000,2241676.0000,2155363.0000,2160933.0000,2161524.0000,2366523.0000,2572614.0000,2560281.0000,2566081.0000,2570600.0000,2565240.0000,2567945.0000,2555932.0000,2549148.0000,2559579.0000,2566122.0000,2558767.0000,2564468.0000,2554079.0000,2574778.0000,2575329.0000,2553738.0000,2561973.0000,2559679.0000,2556262.0000,2378766.0000,2146996.0000,2154180.0000,2144132.0000,2264399.0000,2169068.0000,2152657.0000,2277144.0000,2552887.0000,2566973.0000,2557695.0000,2554238.0000,2564007.0000,2564768.0000,2555391.0000,2561673.0000,2560330.0000,2569958.0000,2564328.0000,2548878.0000,2615756.0000,2555892.0000,2560942.0000,2576942.0000,2572072.0000,2364239.0000,3708096.0000,4907639.0000,4152749.0000,3802074.0000,3794910.0000,3118208.0000,2568996.0000,2549098.0000,2616968.0000,2571602.0000,2565480.0000,2567924.0000,2606007.0000,2548889.0000,2556363.0000,2548758.0000,2568796.0000,2593824.0000,2554559.0000,2562004.0000,2616958.0000,2561703.0000,2550452.0000,2554008.0000,2544280.0000,2606438.0000,2557715.0000,2551133.0000,2554840.0000,2549810.0000,2605395.0000,2551944.0000,2554148.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,soup topology,100,1,211008400,1968071.0600,1925637.7200,2009069.4500,211892.5811,194866.2011,252415.9073,"1678368.0000,1685663.0000,2109145.0000,2099487.0000,2165593.0000,2097002.0000,2100509.0000,2089859.0000,2102292.0000,2152768.0000,2096612.0000,2101611.0000,2096030.0000,2097232.0000,2093706.0000,2150273.0000,2101420.0000,2099146.0000,2100749.0000,2103384.0000,2101020.0000,2164200.0000,2095549.0000,2104506.0000,2095950.0000,2102513.0000,2092524.0000,2155843.0000,2100909.0000,2091772.0000,2101260.0000,2097172.0000,2148971.0000,2095499.0000,2101150.0000,2097262.0000,2101140.0000,2091532.0000,2137609.0000,2095349.0000,2103785.0000,2096210.0000,2101590.0000,2091942.0000,2669448.0000,1683358.0000,1676134.0000,1676875.0000,1679962.0000,1679431.0000,1693557.0000,1678889.0000,1682416.0000,1681895.0000,1676976.0000,1688438.0000,1679350.0000,1698606.0000,1680793.0000,1678348.0000,1683378.0000,1677377.0000,1862177.0000,2095199.0000,2098265.0000,2095409.0000,2098756.0000,2095419.0000,2162657.0000,2098034.0000,2098565.0000,2096751.0000,2147628.0000,2105528.0000,2092463.0000,2096812.0000,2091652.0000,2138902.0000,2095449.0000,2103525.0000,2101431.0000,2160632.0000,2098966.0000,2103654.0000,2097703.0000,1869872.0000,1680994.0000,1688347.0000,1678799.0000,1675402.0000,1681715.0000,1678720.0000,1690972.0000,1681815.0000,1687245.0000,1676646.0000,1677427.0000,1692746.0000,1674481.0000,1685011.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,chain topology,100,1,13419200,138790.3900,138171.8500,141266.3400,5699.1337,699.5033,13483.1682,"137738.0000,138640.0000,143799.0000,138529.0000,138740.0000,137958.0000,138249.0000,138529.0000,138009.0000,138129.0000,138259.0000,138339.0000,138449.0000,138088.0000,138309.0000,138169.0000,137859.0000,194886.0000,138921.0000,138359.0000,138360.0000,137608.0000,138570.0000,138359.0000,138019.0000,137929.0000,138149.0000,138169.0000,137959.0000,138099.0000,138349.0000,138049.0000,138239.0000,138449.0000,137748.0000,137838.0000,137808.0000,137527.0000,138028.0000,137939.0000,137518.0000,137728.0000,137839.0000,137878.0000,138069.0000,142207.0000,138911.0000,138038.0000,138139.0000,138640.0000,138309.0000,137798.0000,138860.0000,137738.0000,138560.0000,138620.0000,138039.0000,137939.0000,138029.0000,138329.0000,137708.0000,137919.0000,137678.0000,137839.0000,137998.0000,137938.0000,138169.0000,138079.0000,138299.0000,137527.0000,137828.0000,137828.0000,138269.0000,138069.0000,141335.0000,138710.0000,137818.0000,137878.0000,138049.0000,137908.0000,138058.0000,138240.0000,137788.0000,138349.0000,138559.0000,137789.0000,138439.0000,137839.0000,138009.0000,138048.0000,137327.0000,137748.0000,137668.0000,137768.0000,138660.0000,137999.0000,137888.0000,138550.0000,137428.0000,137698.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,expanding tree topology,100,1,18976200,190111.1900,188892.4400,193210.6000,8880.9017,1628.5082,16167.8570,"193554.0000,188524.0000,198633.0000,191630.0000,189476.0000,190327.0000,189816.0000,189726.0000,188995.0000,190007.0000,190308.0000,188625.0000,188525.0000,188584.0000,189035.0000,188815.0000,255522.0000,189396.0000,188975.0000,189476.0000,189737.0000,190107.0000,188615.0000,188134.0000,188314.0000,188344.0000,189265.0000,188524.0000,189026.0000,188564.0000,188465.0000,189055.0000,189005.0000,188775.0000,189336.0000,189226.0000,194456.0000,189136.0000,188214.0000,187833.0000,188765.0000,188404.0000,187983.0000,188454.0000,188524.0000,188284.0000,187823.0000,188775.0000,189927.0000,189005.0000,187873.0000,187703.0000,187372.0000,188204.0000,187823.0000,188314.0000,187732.0000,187953.0000,193574.0000,187994.0000,188254.0000,187583.0000,187112.0000,187322.0000,187172.0000,187563.0000,187933.0000,187563.0000,187853.0000,188154.0000,188234.0000,187202.0000,187894.0000,188254.0000,188434.0000,188334.0000,188775.0000,189015.0000,188945.0000,246815.0000,189105.0000,188324.0000,187973.0000,188224.0000,188855.0000,188264.0000,188484.0000,188264.0000,187663.0000,190398.0000,188735.0000,188765.0000,189286.0000,188375.0000,187673.0000,187011.0000,189887.0000,188925.0000,189687.0000,188244.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,contracting tree topology,100,1,23447800,234356.9300,233053.6500,237269.9500,9499.6888,4981.8160,15421.5358,"232968.0000,232277.0000,244551.0000,233620.0000,233199.0000,233610.0000,231947.0000,233109.0000,232237.0000,232678.0000,232658.0000,232898.0000,232157.0000,233039.0000,279637.0000,233881.0000,233550.0000,233129.0000,233309.0000,232878.0000,234582.0000,233390.0000,233309.0000,233380.0000,233900.0000,233028.0000,233820.0000,233319.0000,233780.0000,232137.0000,233270.0000,240142.0000,233610.0000,234532.0000,231917.0000,230734.0000,232507.0000,231065.0000,232728.0000,230244.0000,232678.0000,231426.0000,232538.0000,230524.0000,231445.0000,231265.0000,230905.0000,231266.0000,288744.0000,233049.0000,230975.0000,232076.0000,232337.0000,231186.0000,230834.0000,232537.0000,232978.0000,232026.0000,231516.0000,232107.0000,231596.0000,231105.0000,231405.0000,232007.0000,231246.0000,237607.0000,231486.0000,232758.0000,232478.0000,231416.0000,230905.0000,232327.0000,231466.0000,231636.0000,230945.0000,231456.0000,232548.0000,232708.0000,232387.0000,233851.0000,232758.0000,232368.0000,238469.0000,233149.0000,233028.0000,232608.0000,233870.0000,232759.0000,231466.0000,232297.0000,232869.0000,232317.0000,231816.0000,231977.0000,230494.0000,233089.0000,233209.0000,233179.0000,232738.0000,292762.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,wave_sim topology,100,1,115364100,1111367.6100,1092809.5600,1136941.8600,109799.8413,80886.3452,184996.3756,"963343.0000,963673.0000,1177369.0000,1151940.0000,1140398.0000,1144958.0000,1141561.0000,1139297.0000,1147983.0000,1142523.0000,1141221.0000,1142663.0000,1209330.0000,1149857.0000,1147392.0000,1187157.0000,1147091.0000,1146971.0000,1152111.0000,1146620.0000,1149756.0000,1146972.0000,1198328.0000,1145067.0000,1144898.0000,1148674.0000,1145829.0000,1145398.0000,1142413.0000,1210692.0000,1143645.0000,1143494.0000,1146611.0000,1144366.0000,1141210.0000,1145669.0000,1152201.0000,1149657.0000,1150788.0000,1202466.0000,1147713.0000,1144406.0000,1146480.0000,1152422.0000,1149506.0000,1149165.0000,1150217.0000,1143936.0000,1139126.0000,1141501.0000,1189703.0000,1142703.0000,1141952.0000,1147182.0000,1144346.0000,1143905.0000,1140108.0000,1151469.0000,1144446.0000,1139978.0000,1209741.0000,1142984.0000,1144166.0000,1140789.0000,1147742.0000,1144357.0000,1145649.0000,1151780.0000,1144998.0000,1142623.0000,1147532.0000,1202105.0000,1142893.0000,1146240.0000,1152412.0000,1145308.0000,1820769.0000,970457.0000,957772.0000,959275.0000,959406.0000,966439.0000,959516.0000,958674.0000,961129.0000,979785.0000,961339.0000,960278.0000,959716.0000,964746.0000,956791.0000,959646.0000,962262.0000,961570.0000,965727.0000,959626.0000,958534.0000,957683.0000,973151.0000,959295.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,jacobi topology,100,1,40309400,404537.0600,403001.3400,407590.9500,10523.9693,5796.6235,16405.7926,"402310.0000,403772.0000,410265.0000,402620.0000,404283.0000,403292.0000,401779.0000,401468.0000,403222.0000,447485.0000,404995.0000,402350.0000,402390.0000,402641.0000,401879.0000,402500.0000,402600.0000,401328.0000,401949.0000,407990.0000,401528.0000,401598.0000,401388.0000,401318.0000,402601.0000,401738.0000,400456.0000,401378.0000,401137.0000,408932.0000,402400.0000,400697.0000,401057.0000,403743.0000,400757.0000,401478.0000,400276.0000,400156.0000,401679.0000,466341.0000,402690.0000,401898.0000,401928.0000,401858.0000,400927.0000,403522.0000,402610.0000,399775.0000,405736.0000,403051.0000,402760.0000,402961.0000,401027.0000,400997.0000,401348.0000,400467.0000,401598.0000,402260.0000,406838.0000,402259.0000,402009.0000,401227.0000,401177.0000,402200.0000,401999.0000,401769.0000,401418.0000,400657.0000,456793.0000,403001.0000,402179.0000,402310.0000,400927.0000,401789.0000,402981.0000,401909.0000,401328.0000,403112.0000,407209.0000,402871.0000,402841.0000,403181.0000,402560.0000,402700.0000,401719.0000,402189.0000,401157.0000,401388.0000,406527.0000,401889.0000,401287.0000,401137.0000,402059.0000,402129.0000,403171.0000,404083.0000,401708.0000,403121.0000,448788.0000,402921.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,soup topology,100,1,177423700,1972176.5900,1961465.0400,1984777.3700,59188.3686,50576.9831,69602.9900,"2037139.0000,2048540.0000,2013413.0000,2017491.0000,1960403.0000,2156205.0000,2055604.0000,1990150.0000,1919516.0000,1944072.0000,1919646.0000,1944964.0000,1912572.0000,1978197.0000,1946918.0000,1938842.0000,1944022.0000,1977556.0000,1885260.0000,1961114.0000,1926949.0000,2039022.0000,1941798.0000,1945986.0000,1881604.0000,2018043.0000,1934254.0000,1985180.0000,1933752.0000,1945545.0000,1969871.0000,1973338.0000,1919466.0000,1948531.0000,1937299.0000,1959672.0000,1963929.0000,2033291.0000,1944813.0000,1913674.0000,1952147.0000,1941387.0000,1920899.0000,1909887.0000,1914476.0000,1890420.0000,1973638.0000,1913324.0000,1926348.0000,1939553.0000,2002573.0000,1916831.0000,1950595.0000,1966044.0000,2000188.0000,1925667.0000,1914466.0000,1965082.0000,1992864.0000,1923974.0000,1955564.0000,1940384.0000,1981643.0000,1997905.0000,1912953.0000,2091652.0000,2119755.0000,2088556.0000,2109005.0000,2070441.0000,2061274.0000,2074780.0000,2062857.0000,2046637.0000,2105959.0000,2018282.0000,1991902.0000,2104305.0000,1937540.0000,1950064.0000,1924786.0000,1975171.0000,1944803.0000,1916430.0000,1913955.0000,1916910.0000,1948160.0000,1943762.0000,1974741.0000,1973598.0000,1949011.0000,1911601.0000,2127520.0000,1940185.0000,1947097.0000,1941227.0000,1949162.0000,1958801.0000,1978708.0000,1952569.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,chain topology,100,1,37197600,366650.0100,360290.8200,374064.9500,34952.2290,29950.6317,41019.4593,"321025.0000,348568.0000,426265.0000,403291.0000,378915.0000,405907.0000,403172.0000,410175.0000,378034.0000,374747.0000,379857.0000,373826.0000,406718.0000,467514.0000,432797.0000,377723.0000,376491.0000,406668.0000,380097.0000,375088.0000,378124.0000,406959.0000,404424.0000,465690.0000,431876.0000,349700.0000,349370.0000,402961.0000,348026.0000,437737.0000,438028.0000,348809.0000,348077.0000,433649.0000,403292.0000,437767.0000,437386.0000,347606.0000,347635.0000,347035.0000,346364.0000,349740.0000,348978.0000,348537.0000,405536.0000,435813.0000,434832.0000,347907.0000,347837.0000,350882.0000,346885.0000,347476.0000,348959.0000,345542.0000,348387.0000,350842.0000,350351.0000,344861.0000,350802.0000,345562.0000,349109.0000,348428.0000,348408.0000,344971.0000,349089.0000,347877.0000,349420.0000,350161.0000,345572.0000,349570.0000,346554.0000,347866.0000,351273.0000,347285.0000,349600.0000,345311.0000,348257.0000,353487.0000,341033.0000,351664.0000,348367.0000,349119.0000,347315.0000,345953.0000,348508.0000,348478.0000,350862.0000,345752.0000,346884.0000,348588.0000,348528.0000,347596.0000,350151.0000,347386.0000,347205.0000,346433.0000,349509.0000,317909.0000,289366.0000,289035.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,expanding tree topology,100,1,46740000,483929.7700,477079.7300,491947.9800,37888.7076,32432.3787,46169.4066,"524541.0000,463025.0000,466661.0000,466862.0000,436856.0000,434160.0000,435472.0000,495166.0000,582882.0000,575568.0000,437155.0000,435372.0000,434861.0000,434961.0000,464086.0000,464527.0000,491649.0000,466932.0000,434771.0000,431274.0000,468214.0000,465128.0000,439911.0000,519863.0000,438338.0000,463125.0000,491549.0000,525554.0000,463325.0000,465359.0000,465569.0000,462544.0000,521636.0000,472824.0000,459648.0000,465399.0000,465319.0000,462674.0000,462844.0000,467733.0000,519412.0000,493943.0000,465539.0000,462313.0000,551983.0000,464938.0000,491799.0000,608541.0000,466421.0000,464478.0000,462664.0000,459458.0000,495527.0000,611707.0000,522297.0000,463926.0000,465419.0000,492460.0000,526605.0000,520874.0000,525623.0000,462584.0000,461712.0000,465620.0000,464387.0000,463736.0000,496127.0000,518870.0000,466972.0000,495046.0000,521155.0000,493653.0000,521145.0000,437015.0000,555430.0000,514072.0000,468786.0000,464929.0000,519943.0000,495577.0000,491128.0000,521856.0000,465259.0000,465008.0000,495877.0000,521776.0000,464287.0000,434350.0000,463115.0000,467413.0000,526575.0000,515754.0000,435502.0000,524421.0000,524241.0000,433760.0000,517468.0000,494754.0000,465700.0000,518710.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,contracting tree topology,100,1,46285000,428210.3200,420305.6500,436309.0700,40813.7257,37402.3618,45208.2912,"436734.0000,462824.0000,452835.0000,435263.0000,435062.0000,492912.0000,477302.0000,412840.0000,387081.0000,411206.0000,413901.0000,418911.0000,452996.0000,412318.0000,505105.0000,424933.0000,397982.0000,435693.0000,491198.0000,427387.0000,401017.0000,480658.0000,403122.0000,427708.0000,379868.0000,381340.0000,375199.0000,369127.0000,373165.0000,380488.0000,383033.0000,378464.0000,381962.0000,375739.0000,367623.0000,374327.0000,382151.0000,379937.0000,404844.0000,404293.0000,366481.0000,405716.0000,438889.0000,419813.0000,491489.0000,466050.0000,437557.0000,463636.0000,436644.0000,492991.0000,429321.0000,466361.0000,467954.0000,460160.0000,468375.0000,459838.0000,433919.0000,467453.0000,464377.0000,464949.0000,468425.0000,519902.0000,429942.0000,436835.0000,374216.0000,374507.0000,380328.0000,378044.0000,375679.0000,405175.0000,377793.0000,406999.0000,377703.0000,377973.0000,376751.0000,373214.0000,377492.0000,407249.0000,464588.0000,435333.0000,381120.0000,402390.0000,407289.0000,438639.0000,464707.0000,463806.0000,464708.0000,462664.0000,462844.0000,466782.0000,492731.0000,462864.0000,465920.0000,466391.0000,460019.0000,465710.0000,431404.0000,465840.0000,465049.0000,521486.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,wave_sim topology,100,1,110488400,1192128.7400,1163571.3200,1235959.5600,177831.0035,125529.4236,237707.1005,"1163192.0000,1279352.0000,1921188.0000,1783658.0000,1815378.0000,1832040.0000,1851927.0000,1400130.0000,1260085.0000,1281456.0000,1176728.0000,1208719.0000,1277349.0000,1192748.0000,1174684.0000,1136721.0000,1186246.0000,1162761.0000,1243113.0000,1164484.0000,1192036.0000,1162110.0000,1218657.0000,1300472.0000,1144457.0000,1150117.0000,1164925.0000,1133616.0000,1156339.0000,1190273.0000,1140078.0000,1156480.0000,1162751.0000,1160236.0000,1159776.0000,1249295.0000,1125360.0000,1151029.0000,1174243.0000,1196986.0000,1276948.0000,1174694.0000,1147362.0000,1144576.0000,1200332.0000,1167650.0000,1213607.0000,1155918.0000,1160146.0000,1125881.0000,1159225.0000,1158994.0000,1143565.0000,1131622.0000,1134768.0000,1159936.0000,1163061.0000,1249676.0000,1205803.0000,1228476.0000,1162571.0000,1230128.0000,1152802.0000,1246029.0000,1838732.0000,1034377.0000,1046090.0000,1036672.0000,1035690.0000,1068573.0000,1142343.0000,1107056.0000,1063944.0000,1148003.0000,1041902.0000,1069885.0000,1069896.0000,1030260.0000,1113618.0000,1037163.0000,1032464.0000,1045990.0000,1063593.0000,1059826.0000,1071428.0000,1130951.0000,1061349.0000,1087218.0000,1077099.0000,1162340.0000,1037824.0000,1036762.0000,1054226.0000,1190664.0000,1163061.0000,1133625.0000,1167279.0000,1151761.0000,1173411.0000,1132834.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,jacobi topology,100,1,59954100,605392.8800,588927.6200,625271.1600,92478.5605,78988.2128,108147.1377,"551322.0000,550080.0000,876909.0000,800335.0000,766530.0000,747124.0000,824630.0000,771980.0000,743696.0000,751391.0000,879414.0000,796187.0000,769736.0000,802107.0000,807137.0000,548747.0000,551743.0000,553206.0000,551112.0000,555239.0000,547284.0000,550731.0000,551652.0000,610825.0000,520384.0000,551553.0000,521976.0000,611286.0000,549769.0000,554168.0000,519272.0000,553476.0000,549639.0000,550962.0000,641894.0000,550791.0000,547024.0000,554919.0000,552174.0000,547535.0000,517297.0000,671881.0000,692590.0000,550381.0000,553336.0000,551933.0000,524892.0000,523619.0000,637104.0000,552123.0000,609933.0000,550541.0000,550450.0000,556683.0000,551212.0000,582892.0000,549148.0000,784905.0000,757443.0000,663184.0000,697138.0000,694463.0000,668153.0000,670358.0000,664296.0000,667713.0000,666561.0000,732175.0000,662853.0000,548516.0000,551222.0000,552044.0000,553417.0000,552314.0000,551462.0000,549078.0000,554999.0000,609923.0000,559017.0000,556212.0000,531705.0000,530562.0000,531143.0000,590757.0000,527126.0000,615323.0000,525494.0000,533488.0000,555269.0000,534330.0000,602689.0000,605495.0000,530282.0000,533057.0000,531164.0000,527227.0000,543888.0000,556852.0000,547996.0000,556041.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,soup topology,100,1,311316100,3065291.9800,3033327.1800,3088953.1700,139318.7900,107820.3538,170707.8807,"3131684.0000,3094883.0000,2682942.0000,2959386.0000,3144267.0000,3101606.0000,3097799.0000,3161720.0000,3098331.0000,3094964.0000,3094843.0000,3153504.0000,3094693.0000,3099852.0000,3159245.0000,3102048.0000,3092228.0000,3089964.0000,3146982.0000,3099332.0000,3099853.0000,3098290.0000,3140671.0000,3101897.0000,3100965.0000,3153434.0000,3325981.0000,3096718.0000,3130882.0000,3102778.0000,3093010.0000,3094483.0000,3145700.0000,3101637.0000,3098520.0000,3094042.0000,3130451.0000,3098942.0000,3102568.0000,3145760.0000,3087961.0000,3098791.0000,3101206.0000,3144227.0000,3090625.0000,3097278.0000,3100705.0000,3150509.0000,3099683.0000,3096216.0000,3159987.0000,3097428.0000,3148716.0000,3093561.0000,3102589.0000,2875016.0000,2685667.0000,2670590.0000,2674547.0000,2669848.0000,2672212.0000,2691419.0000,2676832.0000,2672763.0000,2679226.0000,2828148.0000,3094292.0000,3101186.0000,3143085.0000,3099692.0000,3100274.0000,3089173.0000,3129009.0000,3102408.0000,3101616.0000,3091908.0000,3146020.0000,3100304.0000,3156069.0000,3096837.0000,3095625.0000,3087821.0000,3147193.0000,3096126.0000,3095876.0000,3158454.0000,3096597.0000,3251922.0000,3102137.0000,3093280.0000,3098310.0000,3149637.0000,3099863.0000,3094052.0000,3143275.0000,3094133.0000,3151561.0000,3104582.0000,3097729.0000,3096547.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,chain topology,100,1,43276800,433794.1800,432058.0200,437150.8600,11936.6487,6842.6054,17650.2270,"430312.0000,430854.0000,436284.0000,432227.0000,431104.0000,431044.0000,431686.0000,430754.0000,430132.0000,430914.0000,430624.0000,484195.0000,432236.0000,431295.0000,430984.0000,430924.0000,430513.0000,430713.0000,430974.0000,430003.0000,433298.0000,431215.0000,431004.0000,430633.0000,430453.0000,430322.0000,430182.0000,429802.0000,430964.0000,496919.0000,431455.0000,430953.0000,430212.0000,431816.0000,430894.0000,430603.0000,431144.0000,430523.0000,434491.0000,431084.0000,431536.0000,431204.0000,430473.0000,431856.0000,431485.0000,430754.0000,430543.0000,431606.0000,435082.0000,430793.0000,430463.0000,431274.0000,430864.0000,430513.0000,431795.0000,430593.0000,430843.0000,488913.0000,432086.0000,430713.0000,430584.0000,431896.0000,430473.0000,431184.0000,432136.0000,430804.0000,431895.0000,431464.0000,431895.0000,431024.0000,430554.0000,431114.0000,430693.0000,430252.0000,430934.0000,467383.0000,432517.0000,430664.0000,430253.0000,430703.0000,430423.0000,430794.0000,430754.0000,430914.0000,431134.0000,431004.0000,431755.0000,430022.0000,430403.0000,431525.0000,430814.0000,430282.0000,430583.0000,430713.0000,486529.0000,432277.0000,430613.0000,430183.0000,430653.0000,430493.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,expanding tree topology,100,1,49458200,495026.1400,493359.4500,498037.6300,11169.6543,6835.1568,16391.9999,"492741.0000,491850.0000,507839.0000,494535.0000,493723.0000,493383.0000,492701.0000,492821.0000,492230.0000,493061.0000,533358.0000,492751.0000,492330.0000,491569.0000,491890.0000,492210.0000,492872.0000,491889.0000,497450.0000,493182.0000,492220.0000,492180.0000,491599.0000,491769.0000,491549.0000,492421.0000,546102.0000,493081.0000,491850.0000,492410.0000,492170.0000,492350.0000,492671.0000,492230.0000,493292.0000,491809.0000,491028.0000,491750.0000,492050.0000,491919.0000,492581.0000,491229.0000,490687.0000,498362.0000,493232.0000,491659.0000,491058.0000,492120.0000,492360.0000,491900.0000,491949.0000,537767.0000,492561.0000,492130.0000,492340.0000,493192.0000,492400.0000,492150.0000,491850.0000,495877.0000,490908.0000,492350.0000,491289.0000,491709.0000,491529.0000,491439.0000,492240.0000,494865.0000,493252.0000,492732.0000,491909.0000,491028.0000,491639.0000,493042.0000,492020.0000,553917.0000,492731.0000,492119.0000,491458.0000,491840.0000,491118.0000,492170.0000,492130.0000,493894.0000,491789.0000,492691.0000,491699.0000,492470.0000,491188.0000,492971.0000,491880.0000,492430.0000,491959.0000,492361.0000,491599.0000,492220.0000,491579.0000,491299.0000,491559.0000,541353.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,contracting tree topology,100,1,53436500,540362.7900,538248.3500,544291.8900,14217.0855,8999.3440,21851.8804,"536103.0000,536324.0000,545762.0000,538367.0000,536955.0000,583063.0000,538137.0000,538167.0000,537435.0000,536934.0000,536153.0000,535402.0000,536755.0000,625102.0000,537205.0000,536113.0000,535923.0000,535913.0000,536835.0000,535763.0000,541724.0000,536895.0000,536073.0000,535372.0000,536434.0000,536615.0000,536865.0000,580988.0000,538137.0000,535562.0000,535482.0000,536745.0000,536163.0000,537176.0000,538578.0000,542806.0000,537636.0000,536174.0000,536624.0000,536614.0000,536494.0000,536684.0000,541433.0000,536504.0000,535873.0000,536494.0000,535913.0000,536845.0000,536003.0000,537085.0000,581640.0000,537616.0000,536975.0000,536805.0000,536845.0000,536744.0000,536654.0000,537346.0000,535342.0000,536314.0000,537315.0000,535713.0000,536825.0000,537746.0000,536575.0000,541293.0000,536184.0000,536564.0000,537386.0000,536955.0000,536023.0000,535362.0000,601297.0000,537216.0000,536694.0000,537415.0000,536925.0000,536344.0000,535713.0000,536524.0000,536915.0000,536374.0000,535563.0000,536113.0000,536254.0000,535512.0000,537025.0000,537416.0000,536434.0000,536654.0000,536895.0000,536815.0000,535201.0000,537266.0000,593252.0000,536203.0000,537186.0000,535692.0000,537616.0000,535141.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,wave_sim topology,100,1,315047100,3168900.1500,3152470.3600,3182568.2200,76282.6129,64184.2780,87879.3255,"3190284.0000,3187408.0000,3041182.0000,3008179.0000,3041883.0000,3183692.0000,3224429.0000,3187058.0000,3183301.0000,3246581.0000,3182440.0000,3182690.0000,3232595.0000,3186447.0000,3186237.0000,3181127.0000,3217737.0000,3048496.0000,3006877.0000,3014712.0000,3183311.0000,3189784.0000,3220261.0000,3185796.0000,3188631.0000,2995365.0000,3005023.0000,3069546.0000,3192258.0000,3185235.0000,3230681.0000,3190124.0000,3182920.0000,3257832.0000,3181228.0000,3183932.0000,3239387.0000,3197849.0000,3186878.0000,3189473.0000,3232174.0000,3192589.0000,3186247.0000,3234568.0000,3184734.0000,3245799.0000,3188530.0000,3231341.0000,3190775.0000,3186437.0000,3235770.0000,3187218.0000,3187209.0000,3239628.0000,3184042.0000,3028618.0000,3007197.0000,2998792.0000,2997729.0000,2999382.0000,3009792.0000,3006496.0000,2994363.0000,3006135.0000,3165257.0000,3187219.0000,3229449.0000,3184704.0000,3183862.0000,3191387.0000,3242052.0000,3186858.0000,3185675.0000,3189603.0000,3235410.0000,3186778.0000,3187720.0000,3184834.0000,3224660.0000,3188581.0000,3191587.0000,3242884.0000,3184885.0000,3186998.0000,3230080.0000,3190415.0000,3227996.0000,3183251.0000,3187750.0000,3243886.0000,3188420.0000,3187329.0000,3184964.0000,3241572.0000,3186397.0000,3323287.0000,3189123.0000,3186938.0000,3191477.0000,3242293.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,jacobi topology,100,1,91144700,911202.7900,908946.4600,914826.5600,14389.2301,10083.6184,19107.5227,"910353.0000,904982.0000,914871.0000,909311.0000,906996.0000,905453.0000,906666.0000,907036.0000,908319.0000,906586.0000,905453.0000,905945.0000,954556.0000,906144.0000,906615.0000,909011.0000,908789.0000,909060.0000,906275.0000,906065.0000,905373.0000,906095.0000,905925.0000,906545.0000,905684.0000,965618.0000,906696.0000,904351.0000,906164.0000,905022.0000,906165.0000,905173.0000,905754.0000,906356.0000,908138.0000,906245.0000,907156.0000,905023.0000,956470.0000,907437.0000,905743.0000,906785.0000,905713.0000,914361.0000,907237.0000,906856.0000,906906.0000,911906.0000,908309.0000,906335.0000,905884.0000,906566.0000,952783.0000,906876.0000,907528.0000,908750.0000,911455.0000,907547.0000,907678.0000,906696.0000,963814.0000,908610.0000,906085.0000,905794.0000,907407.0000,906636.0000,907828.0000,906566.0000,907137.0000,907287.0000,905483.0000,904482.0000,906005.0000,948866.0000,906786.0000,907577.0000,905683.0000,906254.0000,911965.0000,906785.0000,906986.0000,904391.0000,970597.0000,906826.0000,907147.0000,905965.0000,905744.0000,908780.0000,905032.0000,906375.0000,906836.0000,913308.0000,906445.0000,906866.0000,907388.0000,960407.0000,905514.0000,906215.0000,905113.0000,905504.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,soup topology,100,1,297951700,3012953.5500,2997621.1200,3024139.0300,66238.5673,49922.9202,86726.0612,"2994382.0000,2988922.0000,3119100.0000,3110964.0000,3042444.0000,2948416.0000,2748998.0000,2750050.0000,2824411.0000,3105674.0000,2955229.0000,2815734.0000,2817278.0000,2819452.0000,2845751.0000,2999253.0000,3031333.0000,3073663.0000,3078733.0000,2988081.0000,2991376.0000,3026975.0000,3040881.0000,2992229.0000,2990105.0000,2988332.0000,3050409.0000,3048486.0000,2986739.0000,2995565.0000,3043145.0000,3060978.0000,3005334.0000,2996057.0000,3015263.0000,3027546.0000,2995796.0000,3005725.0000,3002268.0000,3033206.0000,3055068.0000,3013891.0000,3047744.0000,3019942.0000,3020082.0000,3025633.0000,3020603.0000,3012618.0000,3017286.0000,3015062.0000,3024250.0000,3017206.0000,2999283.0000,3039569.0000,3047593.0000,3056180.0000,3079164.0000,3042975.0000,3015393.0000,3050279.0000,2997709.0000,3014611.0000,3074425.0000,3028558.0000,3041914.0000,3021985.0000,2984764.0000,3025411.0000,3025041.0000,3009962.0000,3019681.0000,3021124.0000,3019431.0000,3017537.0000,3008069.0000,3026794.0000,3073413.0000,3026754.0000,3019921.0000,3071640.0000,3048556.0000,3022887.0000,3045891.0000,3017537.0000,3025011.0000,3019881.0000,3042845.0000,3021344.0000,3022256.0000,3017928.0000,3044648.0000,3023018.0000,3132806.0000,3097218.0000,3001567.0000,3020412.0000,3078493.0000,3020713.0000,3016434.0000,3073062.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,chain topology,100,1,53347200,535949.5100,531008.3500,541334.5400,26321.6044,22941.5133,33036.3922,"558806.0000,534290.0000,501217.0000,551252.0000,553487.0000,547194.0000,552013.0000,549999.0000,521826.0000,522448.0000,552214.0000,521475.0000,522607.0000,524000.0000,518711.0000,555821.0000,519832.0000,518991.0000,524702.0000,526595.0000,584825.0000,505014.0000,638848.0000,505615.0000,507619.0000,559498.0000,555010.0000,508060.0000,533869.0000,539380.0000,551332.0000,521295.0000,492631.0000,552385.0000,523840.0000,548346.0000,522999.0000,554048.0000,547825.0000,525203.0000,521756.0000,550772.0000,554949.0000,522438.0000,520484.0000,550731.0000,553787.0000,551242.0000,549158.0000,550891.0000,522527.0000,557053.0000,548036.0000,520925.0000,523620.0000,492852.0000,522638.0000,551052.0000,522708.0000,552304.0000,607108.0000,522097.0000,551623.0000,553477.0000,493062.0000,548116.0000,554428.0000,491258.0000,550721.0000,583964.0000,492530.0000,522808.0000,510796.0000,557814.0000,557243.0000,586228.0000,505185.0000,557324.0000,560259.0000,505004.0000,504673.0000,505195.0000,503371.0000,506967.0000,505675.0000,507078.0000,558256.0000,585206.0000,532225.0000,557093.0000,559658.0000,505245.0000,534610.0000,555309.0000,559488.0000,505786.0000,506157.0000,508241.0000,525724.0000,562904.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,expanding tree topology,100,1,60504300,618978.3600,613893.8900,623762.3000,25280.4543,22146.9763,29913.0365,"582081.0000,576150.0000,634991.0000,613570.0000,606967.0000,579515.0000,600746.0000,635141.0000,696768.0000,608220.0000,666100.0000,613220.0000,584305.0000,613560.0000,637706.0000,612298.0000,607479.0000,560259.0000,608941.0000,583544.0000,614652.0000,612178.0000,583553.0000,605615.0000,580027.0000,609002.0000,612278.0000,610444.0000,587861.0000,611306.0000,555169.0000,564597.0000,585797.0000,587351.0000,580718.0000,587681.0000,634129.0000,604813.0000,611315.0000,610614.0000,636253.0000,636153.0000,612057.0000,639289.0000,637425.0000,637655.0000,639459.0000,579225.0000,638046.0000,636974.0000,641783.0000,638477.0000,638908.0000,609783.0000,637946.0000,609934.0000,607278.0000,612317.0000,606406.0000,639569.0000,640080.0000,610654.0000,638066.0000,607558.0000,639739.0000,635972.0000,641132.0000,635171.0000,584986.0000,636042.0000,639799.0000,637826.0000,609913.0000,637665.0000,609021.0000,641392.0000,634049.0000,641072.0000,637175.0000,638548.0000,637365.0000,640401.0000,639319.0000,637756.0000,609813.0000,636373.0000,553957.0000,638207.0000,638998.0000,636113.0000,640301.0000,670488.0000,609362.0000,609212.0000,637255.0000,607138.0000,640952.0000,637395.0000,639339.0000,636634.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,contracting tree topology,100,1,64601600,648933.3800,643763.0700,654955.5300,28509.8007,23794.0882,36169.5434,"669706.0000,752934.0000,686739.0000,695005.0000,667913.0000,660308.0000,664667.0000,666150.0000,662863.0000,693541.0000,687139.0000,668314.0000,669556.0000,636874.0000,667252.0000,635812.0000,606296.0000,608982.0000,640592.0000,637766.0000,638487.0000,638037.0000,647044.0000,630953.0000,640471.0000,669276.0000,636013.0000,608110.0000,639659.0000,668304.0000,607589.0000,665589.0000,668814.0000,644008.0000,636664.0000,695345.0000,667202.0000,608921.0000,641473.0000,634260.0000,734660.0000,642265.0000,654097.0000,625793.0000,634199.0000,628198.0000,629510.0000,648516.0000,655329.0000,605395.0000,651582.0000,630432.0000,626434.0000,629961.0000,677682.0000,670819.0000,639009.0000,660609.0000,642354.0000,637485.0000,611977.0000,667823.0000,667322.0000,613891.0000,634169.0000,754557.0000,637665.0000,668504.0000,667221.0000,612588.0000,637837.0000,635862.0000,637636.0000,610715.0000,668615.0000,637305.0000,639769.0000,664887.0000,643947.0000,664927.0000,665738.0000,609713.0000,666470.0000,638357.0000,667833.0000,669315.0000,607999.0000,609723.0000,666210.0000,638979.0000,609122.0000,637846.0000,640220.0000,608981.0000,669386.0000,640822.0000,664397.0000,609232.0000,668384.0000,638437.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,wave_sim topology,100,1,325893000,3294001.5500,3281001.8600,3336081.9500,107426.1108,41009.5408,239345.5949,"3297618.0000,3265617.0000,3395283.0000,3335149.0000,3330039.0000,3364244.0000,3284633.0000,3303960.0000,3249436.0000,3288861.0000,3249627.0000,3246711.0000,3291606.0000,3327845.0000,3329429.0000,3257802.0000,3244157.0000,3250339.0000,3372209.0000,3281157.0000,3337484.0000,3247583.0000,3255357.0000,3274253.0000,3228477.0000,3243856.0000,3253905.0000,3226613.0000,3214701.0000,3247914.0000,3268964.0000,3303208.0000,3277821.0000,3264425.0000,3288360.0000,3252492.0000,3309621.0000,3260678.0000,3272620.0000,3302327.0000,3228447.0000,3222686.0000,3353103.0000,3225711.0000,3284412.0000,3337564.0000,4275542.0000,3312136.0000,3254736.0000,3367680.0000,3272160.0000,3246230.0000,3237193.0000,3252753.0000,3276598.0000,3320421.0000,3205193.0000,3275566.0000,3366719.0000,3312746.0000,3269524.0000,3234328.0000,3254987.0000,3390124.0000,3313688.0000,3283140.0000,3322505.0000,3226213.0000,3317115.0000,3215111.0000,3337914.0000,3287609.0000,3344867.0000,3311043.0000,3293820.0000,3270326.0000,3281687.0000,3323857.0000,3330680.0000,3298770.0000,3302067.0000,3270246.0000,3324809.0000,3284493.0000,3284302.0000,3311524.0000,3183903.0000,3317105.0000,3291506.0000,3229860.0000,3257151.0000,3240370.0000,3216273.0000,3299822.0000,3303178.0000,3251400.0000,3338525.0000,3285555.0000,3294312.0000,3282469.0000" -building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,jacobi topology,100,1,101230000,998603.1300,993389.1700,1003853.6800,26532.3007,22651.4054,32718.9121,"985566.0000,1017496.0000,980937.0000,987650.0000,1016304.0000,1016193.0000,1013217.0000,1016814.0000,989082.0000,988020.0000,1013368.0000,1018868.0000,1013217.0000,1015772.0000,926443.0000,1018408.0000,983502.0000,955488.0000,957041.0000,1051761.0000,1016845.0000,984413.0000,1017095.0000,1015301.0000,1020301.0000,1014200.0000,1015302.0000,1045179.0000,1008639.0000,1012467.0000,1009561.0000,975286.0000,989683.0000,985726.0000,932124.0000,1014831.0000,1016474.0000,1014600.0000,986567.0000,974034.0000,988040.0000,1037052.0000,955929.0000,960949.0000,1056190.0000,990935.0000,991256.0000,991306.0000,984403.0000,1006135.0000,960358.0000,956370.0000,986107.0000,1008870.0000,986216.0000,985746.0000,986016.0000,987518.0000,988290.0000,1014620.0000,1014550.0000,1015822.0000,987558.0000,988591.0000,986477.0000,1043445.0000,959055.0000,984543.0000,989082.0000,987118.0000,1016134.0000,1044176.0000,983581.0000,1012817.0000,1008359.0000,1036953.0000,1009801.0000,1012787.0000,982650.0000,1041101.0000,1011445.0000,1006375.0000,934459.0000,987759.0000,984643.0000,957472.0000,981558.0000,1012126.0000,987610.0000,1010523.0000,984704.0000,984944.0000,984022.0000,1010453.0000,1095233.0000,976758.0000,989432.0000,1015732.0000,987820.0000,1016564.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,soup topology,100,1,243152900,2560884.0400,2547804.0900,2587891.6800,92063.4278,48031.6296,150434.6614,"2584235.0000,2528370.0000,2549420.0000,2533350.0000,2537316.0000,2591169.0000,2527518.0000,2530173.0000,2596519.0000,2530764.0000,2572494.0000,3090134.0000,2533429.0000,2578234.0000,2532277.0000,2518401.0000,2576371.0000,2542045.0000,2913600.0000,2743077.0000,2591600.0000,2530714.0000,2533139.0000,2523681.0000,2539480.0000,2590256.0000,2528950.0000,2535513.0000,2532948.0000,2514875.0000,2567794.0000,2516106.0000,2511608.0000,2560590.0000,2518871.0000,2512028.0000,2566061.0000,3132916.0000,2528390.0000,2579396.0000,2527588.0000,2538800.0000,2541425.0000,2529171.0000,2568917.0000,2533550.0000,2527047.0000,2529582.0000,2588133.0000,2525735.0000,2534171.0000,2528781.0000,2526797.0000,2576491.0000,2526056.0000,2532988.0000,2539601.0000,2530343.0000,2571401.0000,2532728.0000,2527088.0000,2570029.0000,2531205.0000,2534852.0000,2537176.0000,2531917.0000,2583875.0000,2533921.0000,2530995.0000,2534241.0000,2597550.0000,2527038.0000,2539240.0000,2528951.0000,2578575.0000,2596018.0000,2528500.0000,2532257.0000,2570359.0000,2522379.0000,2532437.0000,2535894.0000,2529412.0000,2589546.0000,2525614.0000,2528930.0000,2589275.0000,2523942.0000,2532367.0000,2538059.0000,2527187.0000,2570259.0000,2527067.0000,2524423.0000,2534521.0000,2525845.0000,2594876.0000,2532718.0000,2523250.0000,2533499.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,chain topology,100,1,36617100,377234.4800,375728.7700,380173.6200,10308.7143,5750.7886,15851.3396,"374537.0000,374797.0000,384386.0000,376931.0000,374337.0000,377172.0000,375488.0000,375619.0000,374347.0000,374817.0000,374467.0000,380238.0000,373625.0000,374317.0000,374176.0000,374086.0000,375158.0000,374537.0000,374788.0000,374717.0000,374276.0000,373626.0000,422387.0000,374016.0000,375248.0000,374477.0000,372984.0000,373825.0000,374277.0000,373896.0000,373234.0000,373465.0000,380017.0000,374357.0000,375409.0000,374277.0000,375388.0000,372443.0000,372553.0000,374066.0000,374747.0000,376250.0000,374787.0000,436024.0000,374768.0000,373846.0000,375028.0000,374637.0000,376501.0000,375269.0000,374397.0000,374787.0000,374427.0000,375089.0000,379707.0000,374216.0000,376070.0000,373925.0000,376851.0000,374597.0000,374647.0000,374567.0000,373706.0000,375479.0000,382071.0000,375028.0000,377853.0000,375148.0000,374748.0000,375068.0000,375229.0000,375208.0000,373756.0000,374817.0000,375248.0000,429051.0000,377673.0000,375539.0000,374377.0000,375609.0000,375068.0000,376019.0000,374547.0000,375419.0000,374897.0000,381189.0000,375859.0000,373225.0000,376040.0000,373976.0000,373816.0000,374216.0000,374607.0000,375228.0000,375399.0000,373736.0000,418490.0000,375538.0000,373555.0000,375118.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,expanding tree topology,100,1,42970100,470129.3100,468454.2400,473356.6900,11413.8874,6153.4252,17317.8395,"468104.0000,473375.0000,479486.0000,466922.0000,468826.0000,467203.0000,466832.0000,472914.0000,468696.0000,468575.0000,468095.0000,466642.0000,466762.0000,467263.0000,467563.0000,466432.0000,523679.0000,466641.0000,465670.0000,465049.0000,467703.0000,466180.0000,467674.0000,465830.0000,476070.0000,466952.0000,466411.0000,469126.0000,467994.0000,468475.0000,467613.0000,466581.0000,468205.0000,524351.0000,469517.0000,468475.0000,466461.0000,466562.0000,466852.0000,465980.0000,466631.0000,472623.0000,465980.0000,467153.0000,466090.0000,467352.0000,466561.0000,467173.0000,467764.0000,467303.0000,473705.0000,468926.0000,466371.0000,468605.0000,467383.0000,468255.0000,466962.0000,465769.0000,515905.0000,468886.0000,467924.0000,466892.0000,467554.0000,466882.0000,465890.0000,465599.0000,468175.0000,472272.0000,466722.0000,466732.0000,466732.0000,467934.0000,468395.0000,468725.0000,469037.0000,472693.0000,466982.0000,466993.0000,466722.0000,464457.0000,465540.0000,465640.0000,467193.0000,464107.0000,534089.0000,468485.0000,468495.0000,468135.0000,467964.0000,468205.0000,468214.0000,467854.0000,474226.0000,466251.0000,467163.0000,467222.0000,465689.0000,467212.0000,465881.0000,468916.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,contracting tree topology,100,1,52231100,510464.8400,505539.6300,514486.2100,22710.0036,16885.1068,29195.4525,"513651.0000,514602.0000,547605.0000,439731.0000,441694.0000,440231.0000,440112.0000,438027.0000,440713.0000,438709.0000,481359.0000,513391.0000,513400.0000,512569.0000,512429.0000,513420.0000,510846.0000,512388.0000,517007.0000,514081.0000,511797.0000,513100.0000,512388.0000,512408.0000,513962.0000,510274.0000,520985.0000,514132.0000,511076.0000,512058.0000,510555.0000,512459.0000,511146.0000,510745.0000,556572.0000,514222.0000,512047.0000,511336.0000,512328.0000,510214.0000,512038.0000,518530.0000,514452.0000,513901.0000,513260.0000,512328.0000,512488.0000,514252.0000,511256.0000,569687.0000,513180.0000,514042.0000,512488.0000,511617.0000,513300.0000,511116.0000,513040.0000,521025.0000,513090.0000,511326.0000,513110.0000,512399.0000,510294.0000,513421.0000,512028.0000,518520.0000,512428.0000,514423.0000,513009.0000,512108.0000,513841.0000,514773.0000,563846.0000,514944.0000,513951.0000,515284.0000,513551.0000,512849.0000,515744.0000,514473.0000,521476.0000,513701.0000,513570.0000,514863.0000,514242.0000,512488.0000,512979.0000,512258.0000,520383.0000,512599.0000,513591.0000,513631.0000,512288.0000,510415.0000,511928.0000,512458.0000,579536.0000,515775.0000,512519.0000,514603.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,wave_sim topology,100,1,235850400,2301567.6700,2270374.6800,2327031.5500,143440.0701,120949.1217,162505.4066,"2011960.0000,2016179.0000,2413232.0000,2375691.0000,2429953.0000,2369278.0000,2362745.0000,2415596.0000,2358748.0000,2356384.0000,2352958.0000,2359449.0000,2409053.0000,2367726.0000,2362886.0000,2349391.0000,2417199.0000,2357576.0000,2362997.0000,2364830.0000,2361944.0000,2420005.0000,2355903.0000,2360071.0000,2369088.0000,2362425.0000,2402010.0000,2358678.0000,2366994.0000,2362616.0000,2363698.0000,2434652.0000,2362175.0000,2358718.0000,2356834.0000,2349461.0000,2412601.0000,2357015.0000,2361493.0000,2393484.0000,2359850.0000,2370361.0000,2357416.0000,2413612.0000,2362895.0000,2358187.0000,2417049.0000,2351114.0000,2361093.0000,2356273.0000,2364439.0000,2406509.0000,2353989.0000,2356544.0000,2345182.0000,2354430.0000,2395047.0000,2352376.0000,2352757.0000,2353579.0000,2362635.0000,2415406.0000,2359780.0000,2361764.0000,2362566.0000,2142378.0000,2007563.0000,2025907.0000,2013114.0000,2014085.0000,2009316.0000,2025296.0000,2007453.0000,2017802.0000,2010077.0000,2009236.0000,2012983.0000,2023032.0000,2010748.0000,2013945.0000,2012422.0000,2328040.0000,2431487.0000,2362485.0000,2364369.0000,2360522.0000,2373186.0000,2437117.0000,2358448.0000,2370019.0000,2364028.0000,2367765.0000,2420675.0000,2352286.0000,2360220.0000,2358146.0000,2424032.0000,2253890.0000,2003255.0000,2022891.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,jacobi topology,100,1,91078900,917486.1600,911639.7900,931401.6900,43100.7080,18351.1003,82513.9281,"915924.0000,906225.0000,920401.0000,915662.0000,910443.0000,963454.0000,908549.0000,904913.0000,906836.0000,911275.0000,905944.0000,902318.0000,907377.0000,909411.0000,903039.0000,903650.0000,903449.0000,903830.0000,960047.0000,904813.0000,905974.0000,905835.0000,912146.0000,905143.0000,907838.0000,903009.0000,901105.0000,910623.0000,907177.0000,904351.0000,906405.0000,957462.0000,907187.0000,904171.0000,903620.0000,907618.0000,906115.0000,905503.0000,905303.0000,907948.0000,911846.0000,902177.0000,905794.0000,905704.0000,970857.0000,905714.0000,902167.0000,904281.0000,904742.0000,906806.0000,903069.0000,904572.0000,905073.0000,908229.0000,905614.0000,905564.0000,904792.0000,963474.0000,906195.0000,904842.0000,903250.0000,906415.0000,911726.0000,906475.0000,908169.0000,907708.0000,962682.0000,909020.0000,908108.0000,908089.0000,915653.0000,908760.0000,905884.0000,905273.0000,905924.0000,914200.0000,906285.0000,908128.0000,908119.0000,978862.0000,906054.0000,899453.0000,1126172.0000,1252551.0000,931904.0000,904431.0000,903038.0000,911866.0000,908589.0000,906405.0000,907517.0000,905623.0000,966800.0000,906897.0000,908910.0000,906405.0000,912918.0000,906997.0000,906625.0000,907126.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,soup topology,100,1,185383900,1902718.9300,1867395.5500,1933681.8300,168622.0386,145356.6157,191435.2437,"1951847.0000,1941377.0000,1920968.0000,1922842.0000,2000048.0000,1969801.0000,2036267.0000,2008354.0000,1938441.0000,1971393.0000,1939473.0000,1957437.0000,1931879.0000,1954673.0000,1972336.0000,1947058.0000,1970762.0000,2032019.0000,1940826.0000,1953390.0000,2032941.0000,1939644.0000,2058620.0000,1954422.0000,1940956.0000,2033352.0000,1969471.0000,2050173.0000,1931538.0000,2067276.0000,1934364.0000,2065973.0000,1957217.0000,1925927.0000,1942549.0000,1941287.0000,2043370.0000,2052738.0000,1842730.0000,1629165.0000,1610409.0000,1628834.0000,1691042.0000,2033892.0000,1970793.0000,1948551.0000,2027591.0000,2033652.0000,2034994.0000,1947448.0000,1909927.0000,1946186.0000,2005730.0000,1939123.0000,1950875.0000,1941637.0000,1921058.0000,2003144.0000,2054391.0000,1961986.0000,1942199.0000,1989328.0000,2103695.0000,2144111.0000,2228451.0000,2107202.0000,2240594.0000,2010608.0000,2082625.0000,1962678.0000,1920287.0000,1939974.0000,1998635.0000,1910889.0000,1931929.0000,1928533.0000,1865684.0000,1558832.0000,1601173.0000,1560555.0000,1567248.0000,1595141.0000,1542562.0000,1540196.0000,1568069.0000,1551017.0000,1648872.0000,1623545.0000,1622904.0000,1570915.0000,1696883.0000,1550346.0000,1647510.0000,1598607.0000,1965803.0000,1949222.0000,1974209.0000,1970933.0000,1988096.0000,1935666.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,chain topology,100,1,56066100,638199.8600,621765.8400,660732.4900,97317.0042,75269.3123,121669.1372,"579385.0000,583222.0000,869305.0000,904822.0000,901756.0000,874074.0000,956730.0000,898660.0000,958153.0000,874124.0000,870988.0000,980796.0000,609061.0000,579005.0000,608951.0000,698381.0000,638287.0000,607579.0000,611266.0000,669466.0000,666019.0000,608380.0000,608411.0000,638137.0000,667002.0000,580528.0000,607549.0000,639770.0000,666881.0000,639569.0000,585497.0000,577732.0000,666832.0000,584004.0000,608139.0000,579576.0000,581559.0000,694062.0000,578614.0000,642535.0000,609742.0000,582331.0000,578924.0000,581459.0000,578103.0000,608100.0000,678413.0000,579997.0000,613560.0000,638597.0000,637625.0000,585777.0000,584055.0000,587851.0000,611366.0000,664176.0000,686017.0000,584084.0000,609061.0000,581569.0000,694133.0000,580677.0000,582611.0000,579185.0000,581068.0000,608200.0000,579155.0000,642635.0000,666049.0000,582251.0000,576330.0000,581369.0000,698421.0000,580648.0000,577672.0000,668925.0000,581148.0000,609483.0000,581580.0000,578724.0000,668274.0000,579686.0000,581529.0000,580879.0000,609743.0000,578664.0000,580578.0000,580307.0000,610735.0000,581299.0000,580548.0000,580548.0000,577722.0000,581940.0000,582722.0000,582531.0000,606477.0000,582631.0000,578263.0000,609032.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,expanding tree topology,100,1,85090000,894178.7900,872639.6300,917966.8500,115224.6385,103039.5958,129435.3580,"745972.0000,788602.0000,1076668.0000,1101886.0000,1103409.0000,1108559.0000,1146320.0000,1101585.0000,1188270.0000,1134658.0000,1125731.0000,794603.0000,787349.0000,839498.0000,852864.0000,825151.0000,794453.0000,813840.0000,812327.0000,936272.0000,828106.0000,807728.0000,782269.0000,751902.0000,796507.0000,809151.0000,802979.0000,843667.0000,803951.0000,818158.0000,778693.0000,810503.0000,872551.0000,834499.0000,834809.0000,766961.0000,770568.0000,775036.0000,849097.0000,987850.0000,902547.0000,982049.0000,959847.0000,1044858.0000,958374.0000,1020221.0000,954476.0000,1047613.0000,982529.0000,923297.0000,803450.0000,928778.0000,753897.0000,783442.0000,785436.0000,845500.0000,845660.0000,892369.0000,783232.0000,780076.0000,879113.0000,1132894.0000,1046411.0000,953555.0000,958474.0000,1049867.0000,930982.0000,937134.0000,980867.0000,1045719.0000,930791.0000,777361.0000,789353.0000,778463.0000,786869.0000,814621.0000,836162.0000,843256.0000,819471.0000,786298.0000,935521.0000,949016.0000,1051009.0000,1062201.0000,1048615.0000,1034858.0000,960999.0000,989162.0000,848355.0000,900454.0000,868624.0000,786328.0000,781088.0000,786888.0000,781899.0000,838607.0000,848395.0000,924179.0000,784724.0000,778643.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,contracting tree topology,100,1,84380500,867071.1700,851607.4300,885560.5000,86011.5429,73397.0984,101868.1431,"783242.0000,780888.0000,959105.0000,962872.0000,1047372.0000,1010582.0000,1027835.0000,1045959.0000,1104090.0000,1020261.0000,981227.0000,1018999.0000,857583.0000,819660.0000,869826.0000,831093.0000,783873.0000,837935.0000,820192.0000,822025.0000,840000.0000,960388.0000,810413.0000,808339.0000,846852.0000,814711.0000,840780.0000,869165.0000,806366.0000,880516.0000,812928.0000,776990.0000,846001.0000,867491.0000,784374.0000,778733.0000,762442.0000,844288.0000,865678.0000,872340.0000,808099.0000,859637.0000,788151.0000,841613.0000,839138.0000,971429.0000,813519.0000,818649.0000,866460.0000,866740.0000,838226.0000,817687.0000,902067.0000,984414.0000,985084.0000,1055468.0000,1104621.0000,979964.0000,986587.0000,989452.0000,1144997.0000,917416.0000,837555.0000,839047.0000,937044.0000,868704.0000,777301.0000,788792.0000,841342.0000,842023.0000,843476.0000,810884.0000,780357.0000,786899.0000,904141.0000,861780.0000,790185.0000,783552.0000,781629.0000,870126.0000,813749.0000,841722.0000,839929.0000,871929.0000,810092.0000,846341.0000,826022.0000,906565.0000,775056.0000,769215.0000,753716.0000,911755.0000,850379.0000,798761.0000,784804.0000,818849.0000,773192.0000,830601.0000,849166.0000,837605.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,wave_sim topology,100,1,237737100,2310273.6400,2280443.6200,2336530.8300,142832.9298,126558.0403,157171.8820,"2029393.0000,2119214.0000,2396309.0000,2324543.0000,2451464.0000,2374338.0000,2431195.0000,2365501.0000,2316128.0000,2407170.0000,2409895.0000,2433270.0000,2380168.0000,2432779.0000,2458377.0000,2366974.0000,2419613.0000,2296460.0000,2261033.0000,2365772.0000,2407261.0000,2386962.0000,2371753.0000,2386641.0000,2464088.0000,2126498.0000,2060904.0000,2073408.0000,2061335.0000,2046146.0000,2144342.0000,2056455.0000,2075191.0000,2037930.0000,2108123.0000,2055804.0000,2053219.0000,2066955.0000,2084298.0000,2055553.0000,2211048.0000,2260683.0000,2316739.0000,2383946.0000,2350894.0000,2367635.0000,2347107.0000,2426276.0000,2447536.0000,2333730.0000,2345012.0000,2348739.0000,2414253.0000,2348889.0000,2378286.0000,2360261.0000,2382843.0000,2406579.0000,2351033.0000,2352185.0000,2438119.0000,2383054.0000,2376202.0000,2474798.0000,2400888.0000,2442136.0000,2450000.0000,2398243.0000,2372625.0000,2393795.0000,2487643.0000,2353609.0000,2377254.0000,2442387.0000,2359160.0000,2437308.0000,2458176.0000,2348709.0000,2488835.0000,2370791.0000,2389386.0000,2364069.0000,2429772.0000,2352746.0000,2439581.0000,2421738.0000,2370471.0000,2392793.0000,2480409.0000,2354891.0000,2412330.0000,2400447.0000,2060764.0000,2066454.0000,2080661.0000,2062927.0000,2069680.0000,2120186.0000,2137469.0000,2070722.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,jacobi topology,100,1,112747300,1188635.1700,1164230.3100,1229556.9500,158475.1326,104118.8959,228020.8965,"1113979.0000,1076187.0000,1849323.0000,1799889.0000,1772457.0000,1738653.0000,2005589.0000,1136340.0000,1188319.0000,1131542.0000,1222003.0000,1131682.0000,1156579.0000,1252481.0000,1160406.0000,1099421.0000,1103309.0000,1190664.0000,1097938.0000,1195383.0000,1277448.0000,1101805.0000,1099912.0000,1195033.0000,1186216.0000,1101415.0000,1191165.0000,1129789.0000,1159646.0000,1159836.0000,1132043.0000,1256899.0000,1129548.0000,1132373.0000,1101915.0000,1193720.0000,1099602.0000,1189432.0000,1281687.0000,1129168.0000,1099932.0000,1129057.0000,1133055.0000,1135129.0000,1220350.0000,1245087.0000,1106905.0000,1107045.0000,1097697.0000,1189832.0000,1216824.0000,1192648.0000,1163022.0000,1132424.0000,1133917.0000,1098860.0000,1280083.0000,1102637.0000,1193259.0000,1161278.0000,1099120.0000,1135188.0000,1100754.0000,1277609.0000,1104361.0000,1188680.0000,1102307.0000,1162009.0000,1132804.0000,1101676.0000,1252100.0000,1127875.0000,1191335.0000,1101655.0000,1161879.0000,1132143.0000,1193850.0000,1274954.0000,1101705.0000,1101706.0000,1105072.0000,1188690.0000,1105773.0000,1193710.0000,1274463.0000,1103609.0000,1102326.0000,1103829.0000,1188570.0000,1221122.0000,1191004.0000,1161179.0000,1108238.0000,1155939.0000,1194231.0000,1187869.0000,1098910.0000,1103399.0000,1159866.0000,1062171.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,soup topology,100,1,355902100,3497125.0900,3460497.7600,3526126.1900,165274.5684,129532.8556,204090.3933,"3565827.0000,3530870.0000,3548473.0000,3156200.0000,3521863.0000,3534256.0000,3531241.0000,3583380.0000,3518777.0000,3565956.0000,3526482.0000,3530579.0000,3526762.0000,3591595.0000,3198340.0000,3070017.0000,3049708.0000,3053826.0000,3045109.0000,3066019.0000,3056971.0000,3048777.0000,3056030.0000,3064236.0000,3125561.0000,3528935.0000,3522545.0000,3582007.0000,3532764.0000,3586165.0000,3533395.0000,3528886.0000,3577248.0000,3517294.0000,3508107.0000,3517494.0000,3549175.0000,3728013.0000,3767558.0000,3568271.0000,3520350.0000,3583310.0000,3596885.0000,3528766.0000,3528536.0000,3530149.0000,3579643.0000,3525239.0000,3525830.0000,3576817.0000,3542362.0000,3531842.0000,3594471.0000,3526993.0000,3533164.0000,3526963.0000,3578070.0000,3532022.0000,3587117.0000,3532083.0000,3574362.0000,3526402.0000,3514609.0000,3514750.0000,3550798.0000,3515381.0000,3518687.0000,4024796.0000,3528595.0000,3530339.0000,3587347.0000,3523396.0000,3519879.0000,3533715.0000,3574142.0000,3532703.0000,3524468.0000,3564684.0000,3531702.0000,3583450.0000,3523616.0000,3533285.0000,3568361.0000,3532843.0000,3587407.0000,3530810.0000,3530941.0000,3573591.0000,3531592.0000,3528696.0000,3531601.0000,3583220.0000,3533004.0000,3576547.0000,3530229.0000,3528476.0000,3572458.0000,3519559.0000,3512435.0000,3514309.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,chain topology,100,1,67338500,673032.9500,671062.0600,676278.3700,12582.2372,8569.4919,17401.4347,"668104.0000,671259.0000,680537.0000,675267.0000,670639.0000,670197.0000,670038.0000,670197.0000,669787.0000,709262.0000,669175.0000,668224.0000,668324.0000,667613.0000,668334.0000,674806.0000,669957.0000,670599.0000,670337.0000,669055.0000,668575.0000,713018.0000,670868.0000,670558.0000,669256.0000,669967.0000,669326.0000,677551.0000,671900.0000,669216.0000,669987.0000,669416.0000,669506.0000,671099.0000,670438.0000,670267.0000,668064.0000,668925.0000,669226.0000,734449.0000,670639.0000,669236.0000,668775.0000,667412.0000,668635.0000,672832.0000,668163.0000,668214.0000,670558.0000,668093.0000,669416.0000,721044.0000,669256.0000,668665.0000,670258.0000,668965.0000,673364.0000,668544.0000,668244.0000,667613.0000,668324.0000,667743.0000,669426.0000,669787.0000,668565.0000,667011.0000,668455.0000,669436.0000,712417.0000,670037.0000,668805.0000,669105.0000,667642.0000,667823.0000,676369.0000,667883.0000,669266.0000,669546.0000,668344.0000,668604.0000,717437.0000,669816.0000,670107.0000,667653.0000,668134.0000,668344.0000,670819.0000,668163.0000,669025.0000,668304.0000,668514.0000,668804.0000,673884.0000,668755.0000,668344.0000,669837.0000,668675.0000,666781.0000,715804.0000,670258.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,expanding tree topology,100,1,76493600,743359.5000,736866.1500,750169.3800,33838.1024,31273.3850,38696.6286,"769196.0000,769936.0000,720372.0000,710784.0000,709712.0000,709832.0000,716826.0000,709753.0000,709552.0000,707047.0000,708209.0000,710744.0000,810573.0000,769386.0000,771339.0000,769125.0000,771670.0000,771520.0000,771169.0000,771810.0000,770578.0000,771068.0000,776629.0000,769345.0000,771289.0000,770538.0000,772602.0000,836453.0000,772170.0000,769615.0000,769014.0000,768964.0000,768875.0000,774084.0000,768494.0000,769135.0000,769535.0000,770969.0000,775197.0000,770457.0000,769716.0000,770928.0000,771159.0000,826153.0000,768023.0000,768604.0000,770547.0000,768795.0000,774796.0000,769626.0000,769976.0000,770247.0000,768343.0000,770287.0000,712237.0000,709893.0000,709131.0000,710314.0000,708240.0000,710924.0000,709663.0000,709482.0000,708660.0000,708530.0000,714491.0000,712087.0000,710153.0000,709783.0000,707759.0000,707678.0000,723359.0000,708840.0000,707809.0000,708951.0000,708560.0000,708590.0000,714000.0000,709482.0000,708470.0000,706086.0000,707047.0000,726965.0000,710824.0000,708981.0000,708901.0000,710113.0000,709342.0000,716896.0000,708721.0000,709021.0000,709232.0000,710023.0000,708470.0000,771449.0000,770929.0000,770357.0000,770939.0000,772431.0000,820583.0000,770788.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,contracting tree topology,100,1,81998400,790771.3700,782520.5800,799160.8500,42404.7675,38402.9874,48682.1418,"815974.0000,814421.0000,826524.0000,826454.0000,814671.0000,813219.0000,814491.0000,814651.0000,878943.0000,815363.0000,813800.0000,816104.0000,815864.0000,820652.0000,813659.0000,813449.0000,812568.0000,810984.0000,863935.0000,814802.0000,816855.0000,813298.0000,812858.0000,819180.0000,812608.0000,812006.0000,811586.0000,816876.0000,816194.0000,817126.0000,812778.0000,815383.0000,884253.0000,817006.0000,815834.0000,815804.0000,816444.0000,820021.0000,816254.0000,814480.0000,814190.0000,814741.0000,799924.0000,740481.0000,742514.0000,743747.0000,740772.0000,755689.0000,743066.0000,739499.0000,741803.0000,740281.0000,740310.0000,746302.0000,740671.0000,738788.0000,739599.0000,740481.0000,758224.0000,739719.0000,740310.0000,741062.0000,741002.0000,739118.0000,741713.0000,741372.0000,739950.0000,740441.0000,740751.0000,743737.0000,741703.0000,739369.0000,739699.0000,738858.0000,754978.0000,740251.0000,740260.0000,898110.0000,909160.0000,810754.0000,742865.0000,740912.0000,738767.0000,741753.0000,740260.0000,818529.0000,815142.0000,814692.0000,814260.0000,813740.0000,866059.0000,815874.0000,816004.0000,815723.0000,812758.0000,816194.0000,816385.0000,813880.0000,813509.0000,819060.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,wave_sim topology,100,1,435511200,4417184.7100,4394154.0400,4441252.6300,119845.7131,86978.3853,170141.1657,"4404335.0000,4461474.0000,4489367.0000,5024732.0000,4511960.0000,4457587.0000,4390930.0000,4433460.0000,4400098.0000,4444131.0000,4408424.0000,4413143.0000,4468718.0000,4175442.0000,4253520.0000,4412632.0000,4445203.0000,4412812.0000,4413844.0000,4450874.0000,4461404.0000,4411129.0000,4415176.0000,4478476.0000,4413513.0000,4408834.0000,4467606.0000,4417320.0000,4475781.0000,4414876.0000,4473648.0000,4415036.0000,4457106.0000,4403735.0000,4465662.0000,4407382.0000,4403304.0000,4448659.0000,4412953.0000,4439903.0000,4413834.0000,4458869.0000,4404766.0000,4412742.0000,4440214.0000,4405528.0000,4451545.0000,4449761.0000,4408414.0000,4413654.0000,4476823.0000,4415618.0000,4455302.0000,4411239.0000,4420136.0000,4477465.0000,4415848.0000,4471402.0000,4414686.0000,4412100.0000,4458638.0000,4405708.0000,4408734.0000,4443750.0000,4408464.0000,4414185.0000,4464069.0000,4414344.0000,4473156.0000,4415847.0000,4414295.0000,4457707.0000,4390590.0000,4182775.0000,4062618.0000,4064672.0000,4059862.0000,4057579.0000,4057107.0000,4419374.0000,4414115.0000,4453459.0000,4416729.0000,4417221.0000,4847175.0000,4412381.0000,4413362.0000,4560331.0000,4478657.0000,4410588.0000,4403795.0000,4462466.0000,4412552.0000,4438661.0000,4392504.0000,4408473.0000,4468536.0000,4397983.0000,4440785.0000,4401089.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,jacobi topology,100,1,144367100,1425365.3900,1419028.2200,1439986.6400,45998.3443,20039.3065,83195.4885,"1410641.0000,1410320.0000,1422293.0000,1412054.0000,1459374.0000,1416141.0000,1414709.0000,1413166.0000,1408968.0000,1413126.0000,1414298.0000,1408758.0000,1475574.0000,1417725.0000,1411733.0000,1414408.0000,1414097.0000,1413216.0000,1464152.0000,1412565.0000,1409389.0000,1411943.0000,1408306.0000,1408387.0000,1478610.0000,1411102.0000,1405511.0000,1411002.0000,1408126.0000,1462249.0000,1408768.0000,1408457.0000,1413016.0000,1409288.0000,1407264.0000,1418005.0000,1412384.0000,1412575.0000,1452270.0000,1410501.0000,1411342.0000,1432642.0000,1412715.0000,1410270.0000,1407465.0000,1406453.0000,1467649.0000,1412975.0000,1404158.0000,1409409.0000,1407425.0000,1406854.0000,1412876.0000,1409399.0000,1407335.0000,1462779.0000,1411572.0000,1407305.0000,1414037.0000,1410811.0000,1416202.0000,1409990.0000,1409008.0000,1451999.0000,1409859.0000,1409409.0000,1416282.0000,1407575.0000,1409759.0000,1462239.0000,1411713.0000,1410321.0000,1417895.0000,1410621.0000,1409769.0000,1448422.0000,1409108.0000,1414779.0000,1409048.0000,1409729.0000,1407805.0000,1408698.0000,1409940.0000,1476416.0000,1408587.0000,1414008.0000,1449594.0000,1410781.0000,1410089.0000,1414478.0000,1411433.0000,1472338.0000,1441409.0000,1762167.0000,1666687.0000,1411102.0000,1409829.0000,1455205.0000,1406584.0000,1415720.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,soup topology,100,1,311542400,2977127.8800,2942220.9200,3006658.5200,163293.5131,138640.7937,188526.1249,"2634000.0000,2663055.0000,3189743.0000,3169155.0000,3108640.0000,3018538.0000,3018809.0000,3020091.0000,3022195.0000,3025422.0000,3011636.0000,3053005.0000,3046121.0000,3015323.0000,3015052.0000,3024510.0000,3080236.0000,3022366.0000,3015083.0000,3106235.0000,3021084.0000,3083161.0000,3010784.0000,3028336.0000,3110132.0000,3043235.0000,3041823.0000,2994534.0000,3010724.0000,3029319.0000,3011495.0000,3022225.0000,3050239.0000,3049488.0000,3025162.0000,2983884.0000,3072030.0000,3059937.0000,3038446.0000,3022336.0000,3027236.0000,3039098.0000,3017887.0000,3050730.0000,3020282.0000,3053245.0000,3049758.0000,3046842.0000,3046472.0000,3051912.0000,3080576.0000,3016394.0000,3014581.0000,3049758.0000,3048576.0000,3014742.0000,3086548.0000,3015754.0000,2905435.0000,2665720.0000,2711377.0000,2726005.0000,2659929.0000,2622759.0000,2635793.0000,2642907.0000,2668486.0000,2643017.0000,2643488.0000,3067702.0000,3036072.0000,3077220.0000,3030071.0000,3138787.0000,3032606.0000,3141973.0000,3254587.0000,3032706.0000,3030863.0000,3044338.0000,3074425.0000,3032766.0000,3059487.0000,3393790.0000,3042104.0000,3100144.0000,3047654.0000,3014561.0000,3110925.0000,3026624.0000,3038947.0000,3112457.0000,2800005.0000,2632206.0000,2665509.0000,2632287.0000,2635242.0000,2821205.0000,2741444.0000,2621185.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,chain topology,100,1,59215700,606423.4300,602505.4300,611859.2000,23381.6253,17512.7174,33403.2823,"610064.0000,636884.0000,619621.0000,612177.0000,612277.0000,587671.0000,581770.0000,614132.0000,725843.0000,611256.0000,612118.0000,607890.0000,666731.0000,704944.0000,657995.0000,642265.0000,609673.0000,606006.0000,607299.0000,611276.0000,611777.0000,583463.0000,584305.0000,610294.0000,615173.0000,583453.0000,613220.0000,581940.0000,590456.0000,583023.0000,586008.0000,584886.0000,585026.0000,611967.0000,587441.0000,583323.0000,585166.0000,584375.0000,584715.0000,613350.0000,628127.0000,597760.0000,583834.0000,586549.0000,584575.0000,585116.0000,584465.0000,584004.0000,616265.0000,610644.0000,585126.0000,584345.0000,613750.0000,583904.0000,589935.0000,581168.0000,612267.0000,588142.0000,610895.0000,586468.0000,664176.0000,583984.0000,584755.0000,587270.0000,584185.0000,583293.0000,613130.0000,609973.0000,607689.0000,612208.0000,610204.0000,609242.0000,610744.0000,611987.0000,604623.0000,608210.0000,610695.0000,608060.0000,612048.0000,608190.0000,606927.0000,611627.0000,610845.0000,611927.0000,605234.0000,611486.0000,608291.0000,608040.0000,610735.0000,640321.0000,609904.0000,609222.0000,607249.0000,610675.0000,609172.0000,607789.0000,612277.0000,609422.0000,609101.0000,608841.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,expanding tree topology,100,1,82786200,812109.5800,804676.3000,820082.9300,39281.3620,35661.1038,43689.5713,"875877.0000,811175.0000,773583.0000,757263.0000,780808.0000,781839.0000,844629.0000,842534.0000,840189.0000,781879.0000,786699.0000,783432.0000,790696.0000,795314.0000,829269.0000,770588.0000,747584.0000,759055.0000,798931.0000,802469.0000,776820.0000,763284.0000,778253.0000,750490.0000,812648.0000,860408.0000,844098.0000,756050.0000,871408.0000,873773.0000,868263.0000,811756.0000,780938.0000,866059.0000,787861.0000,789474.0000,842134.0000,805654.0000,876067.0000,787369.0000,778763.0000,780747.0000,873603.0000,813309.0000,780095.0000,843536.0000,782881.0000,784314.0000,787179.0000,780065.0000,834048.0000,847684.0000,788401.0000,775086.0000,814932.0000,790134.0000,844668.0000,842174.0000,843977.0000,777430.0000,776469.0000,848195.0000,841352.0000,906385.0000,866290.0000,867381.0000,789373.0000,836183.0000,850279.0000,830622.0000,785627.0000,869916.0000,883632.0000,895514.0000,817927.0000,778773.0000,776439.0000,779384.0000,788051.0000,788061.0000,873072.0000,895654.0000,820312.0000,769927.0000,781399.0000,785245.0000,852383.0000,849918.0000,777801.0000,815402.0000,781188.0000,776990.0000,808660.0000,780887.0000,870037.0000,844358.0000,779515.0000,759247.0000,778192.0000,815272.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,contracting tree topology,100,1,83024700,781869.1100,774433.4600,790309.1000,40428.1960,34253.6199,47649.9653,"781569.0000,769606.0000,805204.0000,739369.0000,750319.0000,723017.0000,768914.0000,731533.0000,719651.0000,759377.0000,705905.0000,752063.0000,731634.0000,739960.0000,751552.0000,733247.0000,759907.0000,727746.0000,730842.0000,749117.0000,830120.0000,740060.0000,714952.0000,726745.0000,757483.0000,732124.0000,750349.0000,730752.0000,742475.0000,746382.0000,728979.0000,751592.0000,725812.0000,732906.0000,877901.0000,810433.0000,871599.0000,870688.0000,794243.0000,785326.0000,786288.0000,783051.0000,868383.0000,902879.0000,781909.0000,875376.0000,777341.0000,783994.0000,841051.0000,873022.0000,809672.0000,874595.0000,813599.0000,780506.0000,780226.0000,787339.0000,783683.0000,784985.0000,783893.0000,806235.0000,792579.0000,784074.0000,785306.0000,773514.0000,785085.0000,787560.0000,781679.0000,785877.0000,782099.0000,786918.0000,777110.0000,782691.0000,818789.0000,781929.0000,781809.0000,788742.0000,782590.0000,784955.0000,785877.0000,782851.0000,776078.0000,787099.0000,809331.0000,849458.0000,777601.0000,784484.0000,781769.0000,781809.0000,883762.0000,776509.0000,779154.0000,783862.0000,781308.0000,792309.0000,779204.0000,790636.0000,776739.0000,780958.0000,790035.0000,783292.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,wave_sim topology,100,1,344466000,3343372.6200,3321241.0100,3360878.9000,99925.8059,78373.6962,124357.7029,"3333325.0000,3339547.0000,3345438.0000,3371568.0000,3333496.0000,3335599.0000,3368652.0000,3367610.0000,3373361.0000,3076158.0000,3051983.0000,3078182.0000,3018900.0000,3087710.0000,3060158.0000,3068423.0000,3068123.0000,3087089.0000,3371609.0000,3383832.0000,3335459.0000,3325099.0000,3447953.0000,3372109.0000,3335129.0000,3416884.0000,3376738.0000,3382048.0000,3380525.0000,3427434.0000,3358694.0000,3328987.0000,3371128.0000,3520851.0000,3379653.0000,3404981.0000,3337463.0000,3314620.0000,3360067.0000,3365366.0000,3416132.0000,3380856.0000,3339517.0000,3353363.0000,3378953.0000,3396686.0000,3410512.0000,3287268.0000,3377819.0000,3455758.0000,3363443.0000,3329758.0000,3318798.0000,3369384.0000,3376177.0000,3332574.0000,3265888.0000,3383932.0000,3383561.0000,3307707.0000,3315622.0000,3421854.0000,3338775.0000,3349065.0000,3463322.0000,3391987.0000,3372590.0000,3416142.0000,3342323.0000,3386166.0000,3385926.0000,3332975.0000,3303960.0000,3408748.0000,3318858.0000,3374763.0000,3637973.0000,3346791.0000,3339127.0000,3335680.0000,3337935.0000,3331102.0000,3398199.0000,3339167.0000,3392147.0000,3370937.0000,3414400.0000,3340980.0000,3309821.0000,3347492.0000,3447271.0000,3379263.0000,3405062.0000,3330531.0000,3386176.0000,3403399.0000,3320622.0000,3322485.0000,3467770.0000,3419819.0000" -building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,jacobi topology,100,1,118687900,1230274.4600,1200105.6900,1276389.5100,187567.9862,134575.2841,245498.8463,"1835757.0000,1855364.0000,1913484.0000,1767408.0000,1771585.0000,1803666.0000,1855775.0000,1359524.0000,1218367.0000,1220300.0000,1216904.0000,1132363.0000,1134858.0000,1125530.0000,1195162.0000,1191044.0000,1185644.0000,1164915.0000,1129589.0000,1137263.0000,1134077.0000,1215280.0000,1189692.0000,1189963.0000,1190583.0000,1130360.0000,1162310.0000,1160006.0000,1220721.0000,1190393.0000,1130189.0000,1193259.0000,1219268.0000,1188319.0000,1189181.0000,1308798.0000,1217475.0000,1251550.0000,1129799.0000,1132334.0000,1219539.0000,1187488.0000,1134888.0000,1188800.0000,1131351.0000,1129317.0000,1224698.0000,1217775.0000,1190012.0000,1193359.0000,1128827.0000,1218396.0000,1157190.0000,1276677.0000,1191065.0000,1135759.0000,1133796.0000,1129949.0000,1223216.0000,1178331.0000,1199221.0000,1170235.0000,1143955.0000,1116203.0000,1118377.0000,1163293.0000,1226482.0000,1171648.0000,1124348.0000,1143214.0000,1168402.0000,1123307.0000,1192287.0000,1171998.0000,1199601.0000,1142943.0000,1173732.0000,1140278.0000,1119299.0000,1137604.0000,1203709.0000,1198800.0000,1194982.0000,1119489.0000,1168733.0000,1145178.0000,1219338.0000,1304350.0000,1137714.0000,1124188.0000,1190474.0000,1133606.0000,1221493.0000,1186827.0000,1162621.0000,1159204.0000,1222794.0000,1134167.0000,1141181.0000,1963679.0000" +benchmark intrusive graph dependency handling with N nodes - 1,creating nodes,100,5646,2258400,4.4827,4.4776,4.5025,0.0479,0.0036,0.1140,"4.4786,4.4803,4.4697,4.4803,4.4786,4.4803,4.4803,4.4805,4.4786,4.4786,4.4679,4.4786,4.4803,4.4803,4.4803,4.4803,4.4786,4.4803,4.4679,4.4803,4.4803,4.4786,4.4803,4.4786,4.4786,4.4803,4.4679,4.4803,4.4786,4.4786,4.4803,4.4803,4.4786,4.9577,4.4679,4.4786,4.4786,4.4803,4.4803,4.4786,4.4786,4.4786,4.4678,4.4803,4.4786,4.4786,4.4803,4.4786,4.4786,4.4786,4.4679,4.4786,4.4803,4.4803,4.4786,4.4786,4.4786,4.4786,4.4679,4.4786,4.4786,4.4786,4.4786,4.4803,4.4803,4.4786,4.4679,4.4803,4.4786,4.4803,4.4786,4.4803,4.4803,4.4786,4.4679,4.4803,4.4786,4.4786,4.4786,4.4786,4.4803,4.4697,4.4750,4.4803,4.4803,4.4787,4.4786,4.4786,4.4803,4.4679,4.4786,4.4786,4.4786,4.4803,4.4803,4.4803,4.4803,4.4679,4.4786,4.4786" +benchmark intrusive graph dependency handling with N nodes - 1,creating and adding dependencies,100,1102,2314200,22.3074,21.8904,23.9292,3.7947,0.6593,8.9242,"22.2722,22.4365,22.2822,22.2178,22.9728,21.9546,21.5272,21.9183,22.7913,21.5000,22.2641,21.9093,21.5272,21.5000,21.5091,22.9365,22.9637,22.9637,22.0635,21.5363,21.7995,22.9093,21.5000,22.7187,22.9637,22.2005,21.9909,59.4574,21.5454,21.5091,21.3730,22.9456,22.9728,22.9728,22.1915,21.9002,20.9002,22.4637,21.5998,22.8276,21.2087,22.6552,20.8993,22.4537,21.5998,21.7904,21.5363,22.6361,22.7278,22.4637,21.5454,21.7450,21.5281,20.8358,20.8639,22.4637,22.4909,21.7187,22.1724,22.4819,21.7096,22.9365,22.9546,21.4183,21.4637,21.5817,20.9183,21.4909,22.8367,21.3267,21.1915,21.9909,20.8367,20.7813,21.4002,21.9365,22.6724,22.8548,22.4274,22.4637,21.6279,21.8176,22.0181,21.3448,20.8367,21.5000,20.9728,21.2087,21.9819,22.9737,22.9728,20.7722,20.8367,20.8267,20.9991,22.4456,22.4546,21.0091,21.2722,22.0726" +benchmark intrusive graph dependency handling with N nodes - 1,adding and removing dependencies,100,1532,2298000,15.4942,15.4738,15.5753,0.1960,0.0106,0.4669,"15.4785,15.4589,15.5372,15.4785,15.4785,15.4785,15.4785,15.4523,15.4785,15.4785,15.4785,15.4785,15.4654,15.4589,15.4785,15.4785,15.4785,15.4785,15.4523,17.4406,15.4850,15.4785,15.4785,15.4785,15.4523,15.4785,15.4785,15.4719,15.4785,15.4785,15.4589,15.4785,15.4785,15.4785,15.4785,15.4523,15.4850,15.4719,15.4785,15.4719,15.4719,15.4523,15.4785,15.4785,15.4850,15.4785,15.4589,15.4785,15.4785,15.4785,15.4719,15.4785,15.4589,15.4719,15.4785,15.4785,15.4791,15.4589,15.4719,15.4785,15.4850,15.5111,15.4785,15.4589,15.4785,15.4785,15.4719,15.4785,15.4523,15.4785,15.4719,15.4719,15.4719,15.4785,15.4523,15.4785,15.4719,15.4850,15.4785,15.4517,15.4719,15.4785,15.4850,15.4850,15.4785,15.4589,15.4850,15.4785,15.4719,15.4719,15.4523,15.4785,15.4785,15.4719,15.4785,15.4785,15.4523,15.4785,15.4785,15.4785" +benchmark intrusive graph dependency handling with N nodes - 1,checking for dependencies,100,14029,1402900,1.6977,1.6963,1.7025,0.0118,0.0027,0.0274,"1.6974,1.6982,1.7017,1.6974,1.6903,1.6974,1.6981,1.6974,1.6975,1.6903,1.6974,1.6981,1.6982,1.6981,1.6981,1.6903,1.6982,1.6974,1.6974,1.6981,1.6903,1.6974,1.6974,1.6974,1.6975,1.6981,1.6903,1.6981,1.6975,1.6974,1.6974,1.6975,1.6903,1.6974,1.6974,1.6982,1.6974,1.6903,1.6981,1.6974,1.6974,1.6974,1.6974,1.6910,1.6981,1.6981,1.6974,1.6975,1.6974,1.8117,1.6974,1.6981,1.6974,1.6981,1.6903,1.6981,1.6981,1.6981,1.6975,1.6974,1.6903,1.6981,1.6982,1.6981,1.6981,1.6974,1.6903,1.6981,1.6974,1.6974,1.6981,1.6903,1.6981,1.6981,1.6982,1.6981,1.6981,1.6903,1.6982,1.6981,1.6981,1.6975,1.6946,1.6917,1.6981,1.6974,1.6974,1.6974,1.6903,1.6974,1.6981,1.6974,1.6974,1.6982,1.6903,1.6981,1.6974,1.6975,1.6974,1.6903" +benchmark intrusive graph dependency handling with N nodes - 10,creating nodes,100,618,2410200,41.2331,41.1761,41.4561,0.5404,0.0428,1.2845,"41.0615,41.1909,41.3058,41.1909,41.1909,41.2071,41.1748,41.2087,41.0615,41.1909,41.1764,41.1748,41.1926,41.1748,41.2071,41.2087,41.0453,41.1909,41.1926,41.2071,41.1926,41.1748,41.1909,41.1926,41.1100,41.1100,41.1926,41.1909,41.2087,41.1909,41.1909,41.1909,41.1909,41.0615,41.1909,41.1909,41.1926,41.1748,41.1909,41.1909,41.2071,41.0631,41.2071,41.1909,41.1926,41.1909,41.1926,41.1909,41.2071,41.0631,41.1909,41.1909,41.1909,41.1909,41.2087,41.2071,41.1926,41.0615,41.1748,41.1926,41.1909,41.1909,41.1926,41.1909,41.2087,41.1909,41.0615,41.2087,41.1909,41.1909,41.2071,41.1909,41.1926,41.2071,41.0615,41.2249,41.1748,41.1926,41.1909,41.1909,41.2087,41.1909,41.0615,41.2087,41.1909,46.5906,41.2233,41.1926,41.2071,41.1926,41.1909,41.0615,41.1926,41.1909,41.1909,41.1909,41.2071,41.1926,41.2071,41.1926" +benchmark intrusive graph dependency handling with N nodes - 10,creating and adding dependencies,100,100,2410000,249.6482,249.1764,251.7982,4.3429,0.3511,10.2628,"249.4500,249.2600,255.1600,249.7600,249.7500,249.9600,249.9500,249.5600,249.4500,249.7600,249.3500,249.4600,249.4500,249.5500,248.5600,249.5500,249.4600,249.4500,249.3500,249.3600,292.3300,249.1600,249.1500,249.1600,249.4500,249.3500,248.5600,249.3500,248.9500,249.1600,249.1500,249.0500,248.8500,248.5500,248.9500,249.2600,249.2500,249.2600,249.0500,249.1500,248.3600,249.0500,249.2500,248.8600,249.0500,249.2500,248.9600,248.4500,249.1500,249.1600,249.3500,249.2500,248.9500,249.0500,248.8600,248.5500,249.0500,249.5600,249.1500,248.8500,249.2600,249.0500,248.2500,248.8600,249.1500,249.1500,249.3600,248.9500,249.1500,248.7500,249.2500,249.2600,249.1500,249.0500,249.0600,249.2500,248.6500,249.2600,249.2500,249.1500,249.4500,249.6500,249.3600,248.3500,249.1500,249.1600,249.3500,249.3600,248.8500,249.1500,248.9600,249.4500,249.2500,249.0600,248.9500,248.9500,249.2600,248.6500,249.0500,249.2500" +benchmark intrusive graph dependency handling with N nodes - 10,adding and removing dependencies,100,110,2409000,219.8958,219.5378,221.5224,3.3354,0.3897,7.9157,"220.4000,219.5818,219.3091,218.5818,219.4909,220.4909,220.7636,220.3091,219.7636,219.6727,220.8545,220.6727,220.2182,220.1273,219.3091,219.4909,252.8273,219.6727,219.4909,219.4909,219.5818,219.0364,220.1273,219.4818,219.3909,219.4000,219.3091,219.0364,219.2182,219.4000,219.3091,219.3909,219.7636,219.7636,219.5818,219.5818,219.4909,219.5818,219.4909,219.0364,219.5818,220.4909,219.9455,219.3091,219.4909,220.0364,219.5818,219.3909,219.4818,219.5818,219.5818,219.0364,219.4909,219.5818,219.3091,219.4000,219.6727,219.0273,219.4909,219.4000,219.5818,219.9455,220.5818,219.0364,219.4909,219.6727,219.5818,219.5818,219.6727,219.0273,219.3909,219.4909,219.4909,219.4000,219.4000,218.7636,219.5818,219.6727,219.6636,219.4818,219.5818,218.7636,219.2182,219.5818,219.5818,219.6727,219.6727,219.0273,219.3909,219.4000,219.4000,219.5818,219.5818,218.7636,219.5818,220.1273,219.7636,219.4909,219.4909,219.0273" +benchmark intrusive graph dependency handling with N nodes - 10,checking for dependencies,100,1030,2369000,24.4521,24.3980,24.5437,0.3520,0.2395,0.6468,"24.1117,24.1515,25.0359,24.5107,24.8029,24.5883,24.5883,24.5883,24.6660,24.6282,24.5981,24.5107,24.5883,24.5981,24.7350,24.7049,24.5883,24.6272,24.5981,24.5097,24.5883,24.5981,24.5981,24.6951,24.5883,24.5883,24.5883,24.5107,24.5883,24.7631,24.5883,24.5981,24.5883,24.5883,24.5981,24.5107,24.8029,24.5883,24.7340,24.6961,24.5883,24.5883,24.5883,24.5107,24.5883,24.5893,24.5883,24.6951,24.5883,24.6272,24.5981,24.5010,24.5883,24.7728,24.5883,24.6951,24.6660,24.6369,26.9621,24.0243,24.1117,24.1117,24.1117,24.1117,24.2291,24.1699,24.0340,24.1117,24.1117,24.3456,24.1117,24.2291,24.1117,24.0243,24.4718,24.4136,24.3544,24.1126,24.2476,24.1893,24.1117,24.0243,24.4233,24.1117,24.3155,24.1223,24.1893,24.1311,24.0340,24.4427,24.1117,24.3165,24.1117,24.1117,24.1408,24.0243,24.1117,24.1117,24.3262,24.1117" +benchmark intrusive graph dependency handling with N nodes - 100,creating nodes,100,56,2419200,450.8261,450.1125,453.8796,6.6068,0.8210,15.6387,"450.4643,450.4464,454.5893,451.0000,450.4643,449.5536,450.4464,450.2857,450.4643,449.3929,448.3214,450.2857,450.8214,450.2857,515.9464,450.8214,450.4643,450.2857,450.8214,448.4821,450.6429,450.2857,450.6429,451.0000,450.2857,450.4643,449.3929,449.2143,450.2857,450.6429,450.2857,449.5714,449.9286,450.2679,448.3036,450.2857,450.4643,450.2857,450.6429,450.4643,450.2857,450.6429,447.2500,450.4643,450.2679,450.6250,450.4464,450.2857,450.8214,450.1071,447.2500,449.3929,450.1071,450.4643,450.6429,450.2857,450.6429,448.3036,450.6429,450.2857,450.2857,450.6429,450.2857,450.4643,450.4643,447.6071,449.5714,450.1071,449.3750,449.5714,449.3929,450.6429,450.2857,448.5000,450.2857,450.2857,450.6429,450.1071,450.8214,450.4643,448.4821,450.2857,450.6429,450.6429,450.2857,451.1786,450.8214,450.2857,448.5000,450.1071,450.6429,450.6429,450.6429,450.4643,450.6429,450.8214,450.2857,450.4643,450.4643,450.2857" +benchmark intrusive graph dependency handling with N nodes - 100,creating and adding dependencies,100,6,2539200,4197.8383,4194.8083,4207.8750,25.1926,9.1716,56.1922,"4199.3333,4207.6667,4429.6667,4199.3333,4182.6667,4177.6667,4185.8333,4187.6667,4179.3333,4182.5000,4189.1667,4187.6667,4184.3333,4187.6667,4177.5000,4191.0000,4204.3333,4199.3333,4196.0000,4197.6667,4195.8333,4197.5000,4201.0000,4217.6667,4199.3333,4194.3333,4172.6667,4199.3333,4185.8333,4207.6667,4196.0000,4187.6667,4192.6667,4204.3333,4186.0000,4192.5000,4207.6667,4197.6667,4189.3333,4202.6667,4207.6667,4211.0000,4184.3333,4192.6667,4195.8333,4187.6667,4197.6667,4221.0000,4196.0000,4201.0000,4197.6667,4192.6667,4201.0000,4199.3333,4207.6667,4200.8333,4204.1667,4192.6667,4202.6667,4202.6667,4204.3333,4176.0000,4197.6667,4204.3333,4206.0000,4194.1667,4194.3333,4199.3333,4206.0000,4224.3333,4192.6667,4202.6667,4186.0000,4194.3333,4204.3333,4199.3333,4189.1667,4197.6667,4202.6667,4197.6667,4184.3333,4204.3333,4204.3333,4201.0000,4185.8333,4204.3333,4194.3333,4197.6667,4189.3333,4207.6667,4179.1667,4184.1667,4187.6667,4196.0000,4175.8333,4189.1667,4192.6667,4187.6667,4187.6667,4187.5000" +benchmark intrusive graph dependency handling with N nodes - 100,adding and removing dependencies,100,6,2740800,4740.5250,4730.7667,4776.4433,84.8875,22.1694,196.5559,"4722.0000,4710.3333,4782.1667,4768.6667,4735.3333,4748.6667,4738.6667,4732.0000,4743.6667,4750.3333,4743.6667,4752.0000,4675.1667,4715.3333,4755.3333,4767.1667,4676.8333,4690.1667,4748.6667,4763.6667,4732.0000,4727.0000,4710.1667,4770.3333,4725.3333,4738.6667,4758.6667,4743.6667,5553.5000,4750.3333,4747.0000,4730.3333,4710.1667,4693.5000,4732.0000,4735.3333,4747.0000,4735.3333,4752.0000,4735.3333,4732.0000,4698.6667,4696.8333,4708.5000,4765.5000,4760.3333,4738.6667,4735.3333,4713.6667,4706.8333,4732.0000,4715.3333,4703.6667,4758.6667,4713.6667,4706.8333,4742.0000,4735.3333,4740.3333,4750.3333,4730.3333,4673.5000,4727.0000,4695.1667,4726.8333,4752.1667,4742.0000,4710.1667,4710.3333,4752.0000,4737.0000,4752.0000,4693.5000,4743.6667,4685.3333,4723.5000,4742.0000,4743.6667,4715.1667,4735.3333,4742.0000,4715.1667,4718.6667,4743.6667,4708.6667,4725.3333,4723.6667,4757.0000,4757.0000,4722.0000,4750.3333,4765.5000,4768.6667,4763.6667,4760.3333,4747.0000,4700.1667,4712.0000,4733.6667,4745.3333" +benchmark intrusive graph dependency handling with N nodes - 100,checking for dependencies,100,13,2518100,1919.1869,1918.3823,1920.2831,4.7312,3.6157,7.1928,"1915.0000,1921.2308,1945.8462,1932.0000,1933.5385,1930.3846,1911.9231,1909.6154,1911.9231,1919.6923,1918.0769,1919.6154,1915.0000,1915.0000,1912.6923,1918.0769,1920.3846,1915.0769,1916.5385,1922.0000,1920.3846,1918.8462,1920.3846,1914.2308,1915.8462,1916.5385,1921.1538,1919.6154,1913.4615,1919.6923,1919.6154,1916.5385,1911.9231,1915.0000,1914.2308,1917.3846,1913.4615,1917.3077,1919.6154,1921.1538,1920.4615,1920.3846,1912.0000,1919.6154,1916.5385,1916.6154,1920.3846,1916.6154,1919.6154,1915.0000,1921.2308,1921.1538,1918.9231,1921.1538,1916.6154,1918.0769,1917.3077,1913.4615,1912.6923,1916.5385,1922.0000,1919.6154,1918.1538,1925.0000,1915.0769,1921.1538,1921.2308,1919.6154,1919.6154,1921.9231,1923.5385,1916.5385,1922.6923,1924.2308,1919.6154,1918.8462,1918.8462,1918.1538,1919.6154,1921.2308,1917.3077,1920.4615,1918.8462,1921.1538,1917.3846,1918.0769,1921.2308,1924.2308,1921.2308,1921.9231,1922.7692,1918.8462,1920.4615,1915.7692,1923.5385,1922.6923,1920.4615,1922.6923,1918.9231,1919.6154" +benchmark task handling > without access thread,generating and deleting tasks,100,1,358578400,3539792.5600,3502144.7400,3605069.2400,245227.8249,137531.6270,397675.0873,"3520245.0000,3513032.0000,3560221.0000,3526718.0000,3518402.0000,3573155.0000,3513742.0000,3513403.0000,3555492.0000,3511679.0000,3506620.0000,3519934.0000,3560311.0000,3532569.0000,3567615.0000,3848939.0000,3552576.0000,3515777.0000,3516048.0000,3511699.0000,3574128.0000,3516978.0000,3555502.0000,3194488.0000,3581492.0000,3515837.0000,3509133.0000,3560872.0000,3516398.0000,3554711.0000,3516839.0000,3510988.0000,3573065.0000,3508353.0000,3500117.0000,3503103.0000,3065071.0000,2926929.0000,2912182.0000,4983491.0000,4989282.0000,3533611.0000,3569418.0000,3516518.0000,3529813.0000,3576953.0000,3517239.0000,3516949.0000,3513864.0000,3550502.0000,3511820.0000,3512070.0000,3545072.0000,3516297.0000,3500487.0000,3563206.0000,3501188.0000,3504395.0000,3557356.0000,3521157.0000,3547087.0000,3842356.0000,3515506.0000,3504545.0000,3557897.0000,3510587.0000,3517851.0000,3554720.0000,3511299.0000,3555492.0000,3513392.0000,3505757.0000,3499234.0000,3543730.0000,3507221.0000,3571923.0000,3511648.0000,3561353.0000,3523382.0000,3525615.0000,3512301.0000,3578425.0000,3505336.0000,3520786.0000,3561634.0000,3511559.0000,3536607.0000,3521016.0000,3504115.0000,3505347.0000,3564940.0000,3531527.0000,3575921.0000,3676381.0000,3588785.0000,3319505.0000,2908114.0000,3522820.0000,3528220.0000,3560271.0000" +benchmark task handling > with access thread,generating and deleting tasks with access thread,100,1,838607000,8288538.3700,8215391.2100,8355026.0600,353791.7434,292243.6987,436681.5240,"8317275.0000,8366237.0000,8394299.0000,8319418.0000,8390412.0000,8353063.0000,8342281.0000,8377409.0000,8311363.0000,8342101.0000,8258692.0000,7490145.0000,7532235.0000,7537175.0000,7738156.0000,8402566.0000,8429467.0000,8355727.0000,8439426.0000,8349114.0000,8317706.0000,8361698.0000,8446820.0000,8308537.0000,8377870.0000,8841358.0000,8383319.0000,8354164.0000,8849724.0000,8862198.0000,8326612.0000,8374713.0000,8321773.0000,8391996.0000,8812764.0000,8375404.0000,8361048.0000,7545770.0000,7495937.0000,7540360.0000,8227984.0000,8349034.0000,8444055.0000,8438033.0000,8358532.0000,7532305.0000,7498330.0000,7530091.0000,7523678.0000,8211153.0000,8355686.0000,8322013.0000,8348673.0000,8382658.0000,8401323.0000,8358333.0000,8328855.0000,8376827.0000,8303548.0000,8308398.0000,8372799.0000,7690205.0000,7494503.0000,7539419.0000,7527096.0000,8067841.0000,8320771.0000,8413446.0000,8303298.0000,8376166.0000,8523756.0000,8549585.0000,8578370.0000,8404640.0000,8350768.0000,9591661.0000,8460576.0000,8541078.0000,8441571.0000,8512073.0000,8473331.0000,8521662.0000,8439205.0000,8495672.0000,8351830.0000,8420749.0000,8687216.0000,8540247.0000,8400713.0000,8350346.0000,8426451.0000,8406593.0000,8398789.0000,8336270.0000,8363001.0000,8358302.0000,8349726.0000,8360475.0000,8394241.0000,8423555.0000" +generating large task graphs,soup topology,100,1,91454000,937794.7400,935559.4200,941232.2000,13935.3230,10030.9526,18178.8798,"975528.0000,933048.0000,947836.0000,936073.0000,931796.0000,933308.0000,981290.0000,933268.0000,932818.0000,932296.0000,930994.0000,934782.0000,933138.0000,932777.0000,931866.0000,936274.0000,932948.0000,933769.0000,933168.0000,974647.0000,934621.0000,931404.0000,931315.0000,932848.0000,941434.0000,934130.0000,932356.0000,930724.0000,936575.0000,932977.0000,932757.0000,933068.0000,992020.0000,934119.0000,933077.0000,932046.0000,940131.0000,933508.0000,932948.0000,934501.0000,933168.0000,938719.0000,931705.0000,931545.0000,933820.0000,985107.0000,930713.0000,931605.0000,934431.0000,938568.0000,933879.0000,932146.0000,933228.0000,970288.0000,936334.0000,934581.0000,935142.0000,932186.0000,938909.0000,932848.0000,932927.0000,933910.0000,973274.0000,934130.0000,931766.0000,930834.0000,937787.0000,931915.0000,932927.0000,931696.0000,936144.0000,932596.0000,932927.0000,933849.0000,931726.0000,990296.0000,931014.0000,933319.0000,932497.0000,935342.0000,931405.0000,932647.0000,931275.0000,933318.0000,932187.0000,931936.0000,930072.0000,932056.0000,984916.0000,931765.0000,932907.0000,931345.0000,938077.0000,933278.0000,931905.0000,931776.0000,935182.0000,931345.0000,931945.0000,932156.0000" +generating large task graphs,chain topology,100,1,3579100,32179.3000,32084.6100,32418.6700,682.0472,97.4356,1237.4138,"32009.0000,32109.0000,36728.0000,32460.0000,32239.0000,32179.0000,32169.0000,32039.0000,31899.0000,32189.0000,32009.0000,32098.0000,31998.0000,32129.0000,32019.0000,32069.0000,31949.0000,32089.0000,31958.0000,32048.0000,32009.0000,32049.0000,32009.0000,31919.0000,32068.0000,32098.0000,32078.0000,32069.0000,32099.0000,32049.0000,31979.0000,32079.0000,31959.0000,32099.0000,31968.0000,32149.0000,32009.0000,32149.0000,32139.0000,32019.0000,32059.0000,32139.0000,31989.0000,32089.0000,32009.0000,32159.0000,32029.0000,32079.0000,32099.0000,32289.0000,32029.0000,32099.0000,32219.0000,32229.0000,32109.0000,32219.0000,32119.0000,32340.0000,32129.0000,32189.0000,32039.0000,32129.0000,32009.0000,32249.0000,31999.0000,32219.0000,31949.0000,32209.0000,32049.0000,32169.0000,32089.0000,32179.0000,32059.0000,32089.0000,32159.0000,32129.0000,32079.0000,32139.0000,31959.0000,37078.0000,32089.0000,32189.0000,32039.0000,32099.0000,31929.0000,32159.0000,31939.0000,32139.0000,31898.0000,32079.0000,32009.0000,32099.0000,31919.0000,32179.0000,32029.0000,32029.0000,31938.0000,32079.0000,32019.0000,32089.0000" +generating large task graphs,expanding tree topology,100,1,5830200,57955.0700,57849.6400,58187.3600,757.2053,419.2312,1423.7742,"58048.0000,57948.0000,60913.0000,58399.0000,58339.0000,57788.0000,57346.0000,57757.0000,57337.0000,57828.0000,57828.0000,57647.0000,57487.0000,57588.0000,57667.0000,57487.0000,57837.0000,57698.0000,57237.0000,57216.0000,58068.0000,57567.0000,58068.0000,58299.0000,57918.0000,57727.0000,57637.0000,57888.0000,58238.0000,57978.0000,58078.0000,58038.0000,57848.0000,58279.0000,57928.0000,57777.0000,57698.0000,57818.0000,57878.0000,57677.0000,57938.0000,57938.0000,57808.0000,58048.0000,58179.0000,57647.0000,57818.0000,57948.0000,58359.0000,57737.0000,58109.0000,58258.0000,58069.0000,58338.0000,57598.0000,58088.0000,63649.0000,58329.0000,60342.0000,57377.0000,57858.0000,57788.0000,57527.0000,57387.0000,57417.0000,57708.0000,57758.0000,57457.0000,57747.0000,57538.0000,57878.0000,57467.0000,57657.0000,57698.0000,58078.0000,59321.0000,58098.0000,57918.0000,57828.0000,57407.0000,58219.0000,57697.0000,57738.0000,57717.0000,58008.0000,57797.0000,57737.0000,57768.0000,57898.0000,58048.0000,57637.0000,57988.0000,57747.0000,57678.0000,58118.0000,57598.0000,57607.0000,57637.0000,57647.0000,58309.0000" +generating large task graphs,contracting tree topology,100,1,8351500,83252.5300,83095.9200,83628.9300,1163.7707,579.9365,2319.8279,"83126.0000,82965.0000,92614.0000,84629.0000,84117.0000,83686.0000,83437.0000,83065.0000,83356.0000,83446.0000,82976.0000,82584.0000,83596.0000,82936.0000,83155.0000,82775.0000,82915.0000,82945.0000,83055.0000,82515.0000,83106.0000,83246.0000,82294.0000,82454.0000,82183.0000,82815.0000,82815.0000,82615.0000,83126.0000,82695.0000,82805.0000,82875.0000,82605.0000,83156.0000,83386.0000,82765.0000,82755.0000,83507.0000,87885.0000,83356.0000,83206.0000,83246.0000,83426.0000,83306.0000,83336.0000,83346.0000,82655.0000,83386.0000,83016.0000,82885.0000,84228.0000,83276.0000,83186.0000,83076.0000,82875.0000,83286.0000,83046.0000,83566.0000,83336.0000,83035.0000,82735.0000,83286.0000,83376.0000,83116.0000,83186.0000,83266.0000,82935.0000,83036.0000,82935.0000,83016.0000,82825.0000,83176.0000,82645.0000,82775.0000,83016.0000,82504.0000,83086.0000,83306.0000,82855.0000,82624.0000,82725.0000,82896.0000,83196.0000,83426.0000,83066.0000,83596.0000,86341.0000,83387.0000,82815.0000,83146.0000,82785.0000,83366.0000,83156.0000,82955.0000,83096.0000,83256.0000,82685.0000,83126.0000,82404.0000,83076.0000" +generating large task graphs,wave_sim topology,100,1,33266400,342686.2800,340658.6000,346012.3800,13026.0669,8446.0187,18672.9676,"303494.0000,402501.0000,364710.0000,346406.0000,341886.0000,339663.0000,339822.0000,339262.0000,339903.0000,338821.0000,339592.0000,339001.0000,405788.0000,340714.0000,339281.0000,339412.0000,340043.0000,339322.0000,340033.0000,339893.0000,338971.0000,339793.0000,339362.0000,339812.0000,345935.0000,340123.0000,339251.0000,339341.0000,340143.0000,340273.0000,339903.0000,340033.0000,339351.0000,339622.0000,339502.0000,339562.0000,344291.0000,340765.0000,339732.0000,339893.0000,339673.0000,339862.0000,340074.0000,339331.0000,339653.0000,340564.0000,339863.0000,396430.0000,342257.0000,339923.0000,339913.0000,341446.0000,340614.0000,339793.0000,340203.0000,339302.0000,341436.0000,339602.0000,339252.0000,343459.0000,340224.0000,340273.0000,340434.0000,340233.0000,339382.0000,340023.0000,339783.0000,339973.0000,339662.0000,338911.0000,339201.0000,382825.0000,340163.0000,339242.0000,340073.0000,339512.0000,339011.0000,339663.0000,339341.0000,338811.0000,339943.0000,339632.0000,388045.0000,340985.0000,339151.0000,339853.0000,338981.0000,339432.0000,338700.0000,339692.0000,339031.0000,339422.0000,338921.0000,339282.0000,345954.0000,338891.0000,338961.0000,340073.0000,338890.0000,340194.0000" +generating large task graphs,jacobi topology,100,1,10885300,106786.9000,106200.7000,109251.5000,5293.2443,926.4350,12418.1835,"105979.0000,106250.0000,113263.0000,107241.0000,106601.0000,106620.0000,106670.0000,106851.0000,106921.0000,111369.0000,106129.0000,106099.0000,106190.0000,106189.0000,106260.0000,106270.0000,106039.0000,106099.0000,106110.0000,106249.0000,106000.0000,106389.0000,106369.0000,106661.0000,106079.0000,106130.0000,106399.0000,106219.0000,106320.0000,106470.0000,105989.0000,105919.0000,105899.0000,105639.0000,105598.0000,105538.0000,105378.0000,106030.0000,106049.0000,105478.0000,106169.0000,105949.0000,105808.0000,105869.0000,105849.0000,106180.0000,158358.0000,106370.0000,106460.0000,106130.0000,105909.0000,105808.0000,106009.0000,105869.0000,106089.0000,106059.0000,105839.0000,106080.0000,105798.0000,106220.0000,105999.0000,105909.0000,106410.0000,106420.0000,105919.0000,105989.0000,106210.0000,106089.0000,106009.0000,105798.0000,106420.0000,106280.0000,105699.0000,105728.0000,106100.0000,105959.0000,105789.0000,105989.0000,105869.0000,105959.0000,106220.0000,105648.0000,105949.0000,111620.0000,105859.0000,106069.0000,106200.0000,106069.0000,105989.0000,106219.0000,106109.0000,105959.0000,106099.0000,106049.0000,105909.0000,105849.0000,105789.0000,106049.0000,105829.0000,105889.0000" +generating large command graphs for N nodes - 1,soup topology,100,1,165789300,1689353.6800,1669680.1000,1724973.8700,132245.7179,82588.7873,189943.2459,"1791556.0000,2334306.0000,1658574.0000,2301153.0000,2218065.0000,1656400.0000,1641522.0000,1645769.0000,1650598.0000,1643465.0000,1700885.0000,1737253.0000,1647644.0000,1649346.0000,1648756.0000,1649466.0000,1645188.0000,1689262.0000,1648555.0000,1650549.0000,1653335.0000,1645729.0000,1706204.0000,1640830.0000,1650018.0000,1649487.0000,1647203.0000,1654486.0000,1648305.0000,1699251.0000,1648574.0000,1648665.0000,1647904.0000,1635680.0000,1684142.0000,1645690.0000,1640390.0000,1647684.0000,1632875.0000,1688691.0000,1638866.0000,1635690.0000,1645248.0000,1643365.0000,1648195.0000,1675867.0000,2241319.0000,1649938.0000,1655047.0000,1648215.0000,1649577.0000,1658634.0000,1649577.0000,1684222.0000,1648695.0000,1649848.0000,1653143.0000,1642954.0000,1708799.0000,1648535.0000,1649737.0000,1648936.0000,1644628.0000,1716043.0000,1645900.0000,1656640.0000,1647904.0000,1649918.0000,1654817.0000,1643406.0000,1705283.0000,1649547.0000,1648675.0000,1652181.0000,1651240.0000,1655989.0000,1648535.0000,1694312.0000,1650389.0000,1647743.0000,1652482.0000,1644056.0000,1655549.0000,1643195.0000,1650579.0000,1700213.0000,1651281.0000,1657993.0000,1650689.0000,1714881.0000,1742734.0000,2164764.0000,1647473.0000,1643626.0000,1658815.0000,1646541.0000,1693761.0000,1649707.0000,1649096.0000,1656921.0000" +generating large command graphs for N nodes - 1,chain topology,100,1,11929900,109150.8900,108816.9100,109830.9200,2329.1836,1336.3010,3844.0147,"108664.0000,109035.0000,118684.0000,109455.0000,109296.0000,108544.0000,108554.0000,108845.0000,108253.0000,108845.0000,108794.0000,108705.0000,108263.0000,108354.0000,108434.0000,108524.0000,108504.0000,108373.0000,119936.0000,109296.0000,109024.0000,108664.0000,108924.0000,108774.0000,108594.0000,108784.0000,108463.0000,108394.0000,108684.0000,108274.0000,108353.0000,108333.0000,108644.0000,108474.0000,108304.0000,108614.0000,108534.0000,108634.0000,108484.0000,108584.0000,108434.0000,108664.0000,109075.0000,108775.0000,108604.0000,108454.0000,108604.0000,108784.0000,108875.0000,108444.0000,108584.0000,108524.0000,108293.0000,108885.0000,108744.0000,115628.0000,109105.0000,108684.0000,108704.0000,108994.0000,108684.0000,108804.0000,108524.0000,108504.0000,108644.0000,108564.0000,108473.0000,108463.0000,108164.0000,108534.0000,108694.0000,108614.0000,108484.0000,108534.0000,108504.0000,108564.0000,108424.0000,108955.0000,108504.0000,108203.0000,108985.0000,108685.0000,108694.0000,108915.0000,108454.0000,108824.0000,108715.0000,108654.0000,108524.0000,108594.0000,108674.0000,108764.0000,124444.0000,114616.0000,109105.0000,108925.0000,108594.0000,108784.0000,109115.0000,108433.0000" +generating large command graphs for N nodes - 1,expanding tree topology,100,1,17962800,183438.9600,182390.0500,186114.8100,7827.3559,2130.1254,14680.3151,"183016.0000,182003.0000,198365.0000,183596.0000,182595.0000,182764.0000,182474.0000,182063.0000,182955.0000,182113.0000,182575.0000,181903.0000,182314.0000,181182.0000,182003.0000,182645.0000,183145.0000,182695.0000,182234.0000,183115.0000,182213.0000,183737.0000,188856.0000,183136.0000,181883.0000,181933.0000,182324.0000,182163.0000,182094.0000,183376.0000,182234.0000,182504.0000,181903.0000,182925.0000,182354.0000,183175.0000,183175.0000,181773.0000,182555.0000,181903.0000,182264.0000,182664.0000,182484.0000,181462.0000,243520.0000,182604.0000,182354.0000,182905.0000,183066.0000,182324.0000,181893.0000,181953.0000,181021.0000,181442.0000,182173.0000,182094.0000,180901.0000,181332.0000,181793.0000,180821.0000,180640.0000,180149.0000,180009.0000,179679.0000,181412.0000,188605.0000,181432.0000,181883.0000,181162.0000,180551.0000,181342.0000,181142.0000,181873.0000,180510.0000,181582.0000,181432.0000,181462.0000,180991.0000,182815.0000,181753.0000,182575.0000,181903.0000,182635.0000,182214.0000,182444.0000,181442.0000,182414.0000,228140.0000,183966.0000,183426.0000,181963.0000,181402.0000,181733.0000,182615.0000,181713.0000,181793.0000,182123.0000,181653.0000,181893.0000,182384.0000" +generating large command graphs for N nodes - 1,contracting tree topology,100,1,21769500,232890.1500,227210.3100,240352.3300,32931.3305,27089.0103,40134.1412,"216168.0000,217251.0000,225436.0000,218553.0000,218142.0000,217801.0000,216008.0000,221158.0000,216909.0000,218282.0000,216148.0000,216649.0000,215887.0000,216508.0000,217772.0000,217100.0000,217761.0000,216869.0000,219735.0000,217060.0000,217411.0000,216529.0000,217270.0000,216399.0000,217390.0000,269169.0000,218522.0000,217731.0000,216759.0000,217821.0000,217039.0000,216990.0000,216780.0000,217170.0000,215978.0000,217420.0000,217802.0000,217059.0000,217049.0000,216889.0000,217551.0000,217471.0000,218112.0000,216959.0000,222210.0000,217150.0000,218523.0000,217451.0000,216919.0000,217070.0000,243190.0000,302151.0000,302943.0000,301630.0000,301530.0000,302171.0000,300338.0000,299406.0000,300688.0000,358148.0000,302151.0000,300518.0000,299907.0000,299787.0000,300148.0000,300548.0000,302853.0000,279869.0000,216959.0000,215136.0000,216499.0000,217601.0000,215406.0000,216760.0000,224934.0000,217310.0000,216920.0000,217250.0000,218002.0000,218432.0000,217631.0000,215517.0000,217331.0000,217090.0000,217530.0000,218051.0000,216629.0000,217180.0000,216609.0000,218633.0000,216739.0000,218172.0000,222400.0000,216679.0000,216509.0000,216479.0000,217942.0000,217741.0000,216629.0000,216549.0000" +generating large command graphs for N nodes - 1,wave_sim topology,100,1,103608100,1032403.6000,1016687.5700,1051103.4700,87112.8342,71113.9180,124798.4865,"927457.0000,923599.0000,1106106.0000,1129270.0000,1070619.0000,1067694.0000,1059788.0000,1065950.0000,1057784.0000,1468834.0000,1315323.0000,1066200.0000,1064648.0000,1066491.0000,1072893.0000,1063295.0000,1063816.0000,1066922.0000,1070799.0000,1064096.0000,1063184.0000,1133628.0000,1068004.0000,1062493.0000,1061982.0000,1072953.0000,1063515.0000,1062974.0000,1067022.0000,1120383.0000,1062444.0000,1061912.0000,1071571.0000,1060921.0000,1062343.0000,1062774.0000,1130422.0000,1070598.0000,1063345.0000,1062063.0000,1108661.0000,1060910.0000,1061963.0000,1064748.0000,1069987.0000,1068885.0000,1066551.0000,1072181.0000,1070038.0000,1063666.0000,1068364.0000,1132546.0000,1064226.0000,1065269.0000,1063756.0000,1068605.0000,1065329.0000,1065960.0000,1069056.0000,1073894.0000,1064557.0000,1063415.0000,1122066.0000,1066030.0000,1060760.0000,1064186.0000,1066551.0000,1062744.0000,1064507.0000,1067173.0000,973164.0000,921686.0000,922258.0000,921446.0000,932888.0000,924060.0000,922929.0000,928189.0000,928308.0000,923359.0000,922027.0000,924762.0000,922748.0000,932517.0000,922418.0000,922378.0000,924582.0000,940141.0000,923931.0000,924431.0000,923329.0000,930002.0000,924541.0000,924972.0000,923760.0000,925363.0000,940201.0000,921936.0000,921946.0000,923389.0000" +generating large command graphs for N nodes - 1,jacobi topology,100,1,33971900,348828.9000,347410.6500,351974.4500,10360.3645,5610.7607,16964.5858,"346726.0000,352997.0000,360922.0000,347827.0000,348428.0000,347276.0000,346204.0000,346725.0000,346426.0000,351174.0000,347006.0000,346816.0000,346305.0000,346114.0000,346926.0000,346625.0000,346856.0000,346485.0000,346215.0000,345684.0000,345413.0000,350623.0000,346224.0000,346004.0000,344952.0000,345543.0000,347016.0000,346835.0000,345854.0000,346345.0000,346836.0000,345764.0000,399416.0000,348078.0000,347478.0000,347076.0000,347928.0000,347367.0000,346825.0000,346906.0000,345503.0000,346385.0000,346445.0000,347688.0000,352086.0000,347287.0000,346225.0000,347347.0000,347487.0000,348520.0000,348128.0000,347307.0000,346395.0000,345583.0000,346265.0000,413623.0000,346886.0000,346105.0000,346084.0000,345343.0000,345042.0000,346044.0000,346295.0000,345734.0000,345684.0000,345914.0000,346315.0000,355202.0000,347197.0000,346035.0000,345423.0000,346846.0000,347518.0000,345693.0000,347337.0000,347066.0000,346145.0000,346315.0000,351695.0000,346525.0000,344692.0000,344892.0000,347116.0000,345423.0000,345453.0000,345995.0000,345974.0000,346445.0000,346515.0000,346856.0000,405568.0000,347687.0000,347467.0000,346405.0000,346726.0000,347377.0000,347216.0000,347186.0000,346926.0000,346034.0000" +generating large command graphs for N nodes - 4,soup topology,100,1,202912700,1981342.1400,1946939.3300,2028360.6100,202934.4825,150021.4715,338411.1709,"1705192.0000,1709571.0000,1784483.0000,2048163.0000,2050317.0000,2047281.0000,2094411.0000,2038314.0000,2042983.0000,2039577.0000,2047372.0000,2040188.0000,2085254.0000,2038053.0000,2045669.0000,2038955.0000,2040999.0000,2040819.0000,2100221.0000,2036200.0000,2047853.0000,2031792.0000,2045578.0000,2036721.0000,2089491.0000,2041429.0000,2043534.0000,2037483.0000,2106935.0000,2040067.0000,2047020.0000,2033706.0000,2040258.0000,2042793.0000,2107596.0000,2039467.0000,2052140.0000,2038986.0000,2093609.0000,2042202.0000,2040659.0000,2044627.0000,2037392.0000,2062180.0000,1709891.0000,1704081.0000,1714460.0000,1701716.0000,1812717.0000,3238200.0000,2034467.0000,1710393.0000,1718107.0000,1708218.0000,1713729.0000,1705894.0000,1725240.0000,1709331.0000,1705543.0000,1718778.0000,1706655.0000,1933225.0000,2045297.0000,2085163.0000,2037813.0000,2050858.0000,2040127.0000,2106172.0000,2047751.0000,2051679.0000,2036991.0000,2042883.0000,1902386.0000,1719679.0000,1705492.0000,1712447.0000,1710422.0000,1736071.0000,1710733.0000,1706525.0000,2007626.0000,2039095.0000,2050287.0000,2048243.0000,2086185.0000,2046510.0000,2082978.0000,2599378.0000,2045869.0000,2100031.0000,2047822.0000,2055327.0000,2041340.0000,2046209.0000,2035950.0000,2088881.0000,2042642.0000,2047792.0000,2051709.0000,1833666.0000" +generating large command graphs for N nodes - 4,chain topology,100,1,37171600,374809.6100,372676.8100,378913.1400,14633.9012,8923.7195,23229.1514,"371213.0000,370801.0000,387012.0000,427830.0000,372605.0000,372705.0000,372265.0000,371843.0000,372775.0000,371372.0000,370982.0000,371503.0000,371423.0000,378275.0000,373857.0000,372775.0000,372435.0000,371332.0000,371423.0000,372645.0000,371132.0000,371233.0000,372254.0000,371884.0000,439121.0000,372785.0000,372956.0000,372635.0000,370641.0000,370521.0000,371493.0000,372224.0000,372595.0000,370330.0000,371192.0000,381221.0000,370922.0000,370981.0000,370531.0000,370000.0000,371543.0000,372214.0000,370110.0000,371743.0000,370791.0000,370261.0000,376272.0000,369539.0000,370581.0000,370661.0000,371693.0000,371092.0000,370521.0000,369910.0000,369970.0000,370130.0000,429693.0000,370782.0000,369298.0000,369179.0000,371412.0000,370161.0000,370130.0000,371793.0000,370701.0000,370651.0000,371513.0000,466493.0000,373406.0000,370471.0000,370321.0000,369589.0000,370331.0000,371342.0000,371492.0000,369980.0000,370321.0000,413673.0000,373046.0000,371763.0000,370902.0000,370130.0000,370551.0000,369700.0000,370040.0000,369028.0000,369108.0000,368917.0000,376532.0000,370401.0000,371332.0000,371493.0000,371793.0000,371473.0000,371803.0000,371051.0000,370039.0000,370360.0000,370731.0000,377283.0000" +generating large command graphs for N nodes - 4,expanding tree topology,100,1,44834700,454699.0800,449771.3500,463501.9300,32652.3672,20993.7821,47946.2894,"445413.0000,445383.0000,458919.0000,446515.0000,445964.0000,446485.0000,444010.0000,441856.0000,448128.0000,442527.0000,440694.0000,457075.0000,613702.0000,614894.0000,612089.0000,522259.0000,444672.0000,444361.0000,444511.0000,441065.0000,442808.0000,443590.0000,442728.0000,443239.0000,507280.0000,444331.0000,444241.0000,443179.0000,444401.0000,445413.0000,444892.0000,445273.0000,445012.0000,451013.0000,444872.0000,547817.0000,446455.0000,446255.0000,444722.0000,446845.0000,445523.0000,444531.0000,493123.0000,446124.0000,445914.0000,446315.0000,446665.0000,441406.0000,443820.0000,442958.0000,446004.0000,450783.0000,444481.0000,446175.0000,446074.0000,443069.0000,443559.0000,443089.0000,445223.0000,443479.0000,452497.0000,445994.0000,444551.0000,445884.0000,445653.0000,445253.0000,444381.0000,443419.0000,444632.0000,512310.0000,445202.0000,447777.0000,446205.0000,446645.0000,445794.0000,446585.0000,444602.0000,444411.0000,452096.0000,444992.0000,443630.0000,441876.0000,445733.0000,443599.0000,444321.0000,444491.0000,446165.0000,497131.0000,445914.0000,444191.0000,445663.0000,446736.0000,445293.0000,444611.0000,444031.0000,451755.0000,451645.0000,447777.0000,447366.0000,445894.0000" +generating large command graphs for N nodes - 4,contracting tree topology,100,1,47942800,449588.5800,443855.9100,455948.0700,30827.4175,28103.4776,35540.9619,"543699.0000,486300.0000,506508.0000,489407.0000,489797.0000,486591.0000,486611.0000,487774.0000,487442.0000,485468.0000,482813.0000,428060.0000,425755.0000,426016.0000,425605.0000,425926.0000,425726.0000,426778.0000,425826.0000,433370.0000,426277.0000,424994.0000,425235.0000,426106.0000,426157.0000,425906.0000,425034.0000,424403.0000,432147.0000,428130.0000,425455.0000,425355.0000,428000.0000,426176.0000,425235.0000,426788.0000,425535.0000,426737.0000,444942.0000,426857.0000,425044.0000,425335.0000,427629.0000,425846.0000,424413.0000,425354.0000,424833.0000,434111.0000,427880.0000,424613.0000,427078.0000,425966.0000,427028.0000,428060.0000,425906.0000,426447.0000,433801.0000,426126.0000,426948.0000,426897.0000,426106.0000,427449.0000,426507.0000,428070.0000,427018.0000,428501.0000,442948.0000,429343.0000,427489.0000,426136.0000,426036.0000,426427.0000,427068.0000,426808.0000,425856.0000,459700.0000,485429.0000,485899.0000,485038.0000,485709.0000,485790.0000,484537.0000,483866.0000,530434.0000,486962.0000,486942.0000,486611.0000,487423.0000,485529.0000,486732.0000,487322.0000,494306.0000,487032.0000,487062.0000,484778.0000,486180.0000,486320.0000,485690.0000,486140.0000,485409.0000" +generating large command graphs for N nodes - 4,wave_sim topology,100,1,219785800,2138371.3400,2109253.1500,2163925.7100,138912.7184,124083.1475,150107.0159,"2262489.0000,2213376.0000,1961738.0000,1922114.0000,1921563.0000,1909871.0000,1912956.0000,1927593.0000,1908157.0000,1914199.0000,1907616.0000,1915641.0000,1911083.0000,1921813.0000,1912996.0000,2099380.0000,2207786.0000,2239215.0000,2202286.0000,2197335.0000,2262409.0000,2204570.0000,2207555.0000,2205422.0000,2207635.0000,2209699.0000,2256378.0000,2206614.0000,2204840.0000,2271797.0000,2218727.0000,2213597.0000,2213396.0000,2272808.0000,2213526.0000,2261458.0000,2220039.0000,2208147.0000,2262008.0000,2251929.0000,2268380.0000,2262751.0000,2317413.0000,2255817.0000,2210130.0000,2280213.0000,2205902.0000,2213396.0000,2208056.0000,2214438.0000,2205712.0000,2263562.0000,2205491.0000,2218355.0000,2216642.0000,2214648.0000,2245918.0000,2211864.0000,2207284.0000,2211262.0000,2275243.0000,2218216.0000,2219639.0000,2247801.0000,2207805.0000,2208617.0000,2214198.0000,1949656.0000,1911704.0000,1916363.0000,1909811.0000,1919579.0000,1912245.0000,1926482.0000,1920771.0000,1923296.0000,1912816.0000,1930440.0000,1911775.0000,1914028.0000,1924588.0000,1921392.0000,2188790.0000,2212064.0000,2208157.0000,2255647.0000,2198618.0000,2207916.0000,2197256.0000,2208848.0000,2207635.0000,2255696.0000,2210220.0000,2198529.0000,2264192.0000,2197345.0000,2224107.0000,2207796.0000,2210311.0000,2208547.0000" +generating large command graphs for N nodes - 4,jacobi topology,100,1,81372400,819192.6600,812775.2700,834825.9300,47262.4898,14489.2273,84104.4829,"807430.0000,805756.0000,818480.0000,809133.0000,807109.0000,810625.0000,814964.0000,810816.0000,812228.0000,809874.0000,809002.0000,817208.0000,811878.0000,811667.0000,811979.0000,810365.0000,876441.0000,808582.0000,808622.0000,806388.0000,811597.0000,815945.0000,809884.0000,810134.0000,809844.0000,811477.0000,861883.0000,809454.0000,810305.0000,810245.0000,808181.0000,813722.0000,808762.0000,807099.0000,811187.0000,809754.0000,815304.0000,812469.0000,812549.0000,809333.0000,810234.0000,853286.0000,810997.0000,809654.0000,809814.0000,808131.0000,820495.0000,808371.0000,808602.0000,808271.0000,807971.0000,816427.0000,811898.0000,808532.0000,807700.0000,852756.0000,810556.0000,809393.0000,811828.0000,809093.0000,812459.0000,804284.0000,806968.0000,805435.0000,802129.0000,810686.0000,806468.0000,805586.0000,804895.0000,805035.0000,854388.0000,804944.0000,805235.0000,801098.0000,802901.0000,811087.0000,803061.0000,802901.0000,803142.0000,804494.0000,866532.0000,803382.0000,805947.0000,802200.0000,804033.0000,809353.0000,803171.0000,804414.0000,1116506.0000,1153175.0000,810285.0000,808962.0000,807270.0000,809674.0000,818190.0000,809494.0000,811618.0000,806578.0000,807409.0000,862193.0000" +generating large command graphs for N nodes - 16,soup topology,100,1,262492700,2657276.5100,2623299.0800,2688066.0700,164869.7804,126427.4208,209612.6921,"2241240.0000,2671184.0000,2680272.0000,2733984.0000,2669812.0000,2661126.0000,2670854.0000,2842330.0000,2674561.0000,2669842.0000,2706402.0000,2661446.0000,2666346.0000,2666736.0000,2726729.0000,2670193.0000,2664942.0000,2717222.0000,2671035.0000,2677226.0000,2673088.0000,2655755.0000,2692455.0000,2661656.0000,2654893.0000,2659783.0000,2700219.0000,3192023.0000,2971114.0000,3067506.0000,2677898.0000,2669602.0000,2671636.0000,2707693.0000,2661666.0000,2677807.0000,2723955.0000,2669141.0000,2672567.0000,2677938.0000,2665865.0000,2725748.0000,2671135.0000,2668880.0000,2672717.0000,2722672.0000,2659393.0000,2661226.0000,2695190.0000,2650334.0000,2655494.0000,2713314.0000,2669261.0000,2723634.0000,3214505.0000,2668711.0000,2711380.0000,2675192.0000,2672447.0000,2718645.0000,2672858.0000,2663730.0000,2676324.0000,2705991.0000,2662448.0000,2671666.0000,2720709.0000,2666095.0000,2673107.0000,2676324.0000,2669311.0000,2709868.0000,2671084.0000,2663389.0000,2663240.0000,2711942.0000,2674010.0000,2666927.0000,2724186.0000,2665424.0000,2659582.0000,2667748.0000,2711461.0000,2664061.0000,2669381.0000,2671755.0000,2664141.0000,2734054.0000,2671956.0000,2659373.0000,2668379.0000,2654343.0000,2205081.0000,2223625.0000,2208497.0000,2226481.0000,2204930.0000,2218015.0000,2208317.0000,2230218.0000" +generating large command graphs for N nodes - 16,chain topology,100,1,123326400,1205512.7700,1182469.7600,1236433.6800,135152.5910,104856.2524,176416.0618,"1221294.0000,1276960.0000,1098963.0000,1068114.0000,1068094.0000,1068565.0000,1209061.0000,1221775.0000,1220974.0000,1280396.0000,1220763.0000,1222107.0000,1228468.0000,1225282.0000,1221044.0000,1229060.0000,1220292.0000,1221775.0000,1219752.0000,1274265.0000,1219321.0000,1218058.0000,1226955.0000,1219441.0000,1218911.0000,1287300.0000,1217557.0000,1220443.0000,1228197.0000,1220883.0000,1220022.0000,1223228.0000,1279835.0000,1231063.0000,1695865.0000,1697848.0000,1436252.0000,1229049.0000,1221786.0000,1220794.0000,1272161.0000,1223729.0000,1220513.0000,1222617.0000,1226956.0000,1223268.0000,1219100.0000,1287751.0000,1180277.0000,1067914.0000,1075929.0000,1068134.0000,1068165.0000,1067362.0000,1073564.0000,1067463.0000,1066611.0000,1066461.0000,1083213.0000,1068354.0000,1067153.0000,1068014.0000,1073554.0000,1068525.0000,1065108.0000,1071070.0000,1066932.0000,1068104.0000,1066511.0000,1083063.0000,1068605.0000,1067814.0000,1067693.0000,1077873.0000,1069486.0000,1069656.0000,1068264.0000,1270378.0000,1223899.0000,1225262.0000,1230803.0000,1227276.0000,1223439.0000,1236663.0000,1219531.0000,1220352.0000,1268053.0000,1218459.0000,1303661.0000,1705943.0000,1257443.0000,1216124.0000,1278693.0000,1219010.0000,1217096.0000,1218981.0000,1222978.0000,1555899.0000,1700734.0000,1303821.0000" +generating large command graphs for N nodes - 16,expanding tree topology,100,1,109571500,1098224.6400,1090946.5600,1112760.3200,50667.8705,24592.2567,101388.6472,"1096938.0000,1092781.0000,1509341.0000,1135242.0000,1119301.0000,1111967.0000,1104543.0000,1102109.0000,1100886.0000,1103582.0000,1092630.0000,1095385.0000,1103030.0000,1096829.0000,1094354.0000,1094314.0000,1105795.0000,1095125.0000,1095065.0000,1094774.0000,1111296.0000,1096648.0000,1096207.0000,1099394.0000,1099153.0000,1098221.0000,1109061.0000,1124641.0000,1110214.0000,1100716.0000,1106888.0000,1101237.0000,1102670.0000,1101437.0000,1103511.0000,1095196.0000,1093672.0000,1099033.0000,1105465.0000,1096027.0000,1093392.0000,1104984.0000,1094924.0000,1096618.0000,1096557.0000,1102700.0000,1094725.0000,1098121.0000,1100345.0000,1103061.0000,1095887.0000,1095486.0000,1102589.0000,1092710.0000,1092090.0000,1095676.0000,1102640.0000,1099233.0000,1095977.0000,1094214.0000,1103641.0000,1095647.0000,1094424.0000,1102469.0000,1095286.0000,1092981.0000,1096067.0000,1134731.0000,941394.0000,936053.0000,936375.0000,1029751.0000,1098021.0000,1093442.0000,1093261.0000,1098211.0000,1094073.0000,1097190.0000,1104353.0000,1097449.0000,1093302.0000,1098381.0000,1101297.0000,1092170.0000,1103301.0000,1094954.0000,1099814.0000,1095526.0000,1100506.0000,1104423.0000,1094985.0000,1093863.0000,1094153.0000,1106908.0000,1095085.0000,1092931.0000,1099794.0000,1095085.0000,1091499.0000,1093061.0000" +generating large command graphs for N nodes - 16,contracting tree topology,100,1,113024000,1187542.9900,1178424.8200,1192268.5300,32237.3057,17902.7351,49380.0025,"1016285.0000,1024441.0000,1220033.0000,1194524.0000,1197780.0000,1191077.0000,1191738.0000,1199313.0000,1185818.0000,1193732.0000,1197139.0000,1193372.0000,1191979.0000,1194594.0000,1198541.0000,1193251.0000,1191138.0000,1200906.0000,1188031.0000,1190937.0000,1197880.0000,1191017.0000,1191197.0000,1194483.0000,1197339.0000,1191007.0000,1188122.0000,1196848.0000,1188102.0000,1191208.0000,1197379.0000,1193271.0000,1189253.0000,1188913.0000,1199704.0000,1188673.0000,1190697.0000,1194083.0000,1189304.0000,1191017.0000,1186869.0000,1197650.0000,1190296.0000,1188673.0000,1195325.0000,1189945.0000,1191077.0000,1191769.0000,1189093.0000,1193301.0000,1191077.0000,1195987.0000,1193772.0000,1193431.0000,1196928.0000,1193863.0000,1193622.0000,1202229.0000,1197369.0000,1193251.0000,1190516.0000,1199173.0000,1191528.0000,1190846.0000,1196828.0000,1192319.0000,1194825.0000,1199503.0000,1189895.0000,1192150.0000,1194373.0000,1197781.0000,1193482.0000,1188692.0000,1196998.0000,1188763.0000,1191698.0000,1195536.0000,1192290.0000,1191218.0000,1190326.0000,1201527.0000,1194263.0000,1198601.0000,1211916.0000,1197198.0000,1191818.0000,1200244.0000,1192470.0000,1192109.0000,1192580.0000,1193231.0000,1194032.0000,1193372.0000,1200355.0000,1191939.0000,1189655.0000,1199203.0000,1072993.0000,1018400.0000" +generating large command graphs for N nodes - 16,wave_sim topology,100,1,382308900,4072216.0200,4015796.8900,4116895.6600,254970.2549,211802.4677,293104.8155,"3514063.0000,3510927.0000,4233979.0000,4228128.0000,4207108.0000,4205635.0000,4205295.0000,4236523.0000,4210375.0000,4215985.0000,4202950.0000,4201066.0000,4206476.0000,4187641.0000,4194363.0000,4192400.0000,4200375.0000,4205584.0000,4205745.0000,4192050.0000,4209662.0000,4191017.0000,4198872.0000,4178955.0000,4198532.0000,4182962.0000,4182551.0000,4196328.0000,4195305.0000,4170178.0000,4209522.0000,4186509.0000,4193061.0000,4177631.0000,4202890.0000,4182180.0000,4199032.0000,4200125.0000,4192570.0000,4208441.0000,3772093.0000,3522048.0000,3505978.0000,3648448.0000,4206227.0000,4211596.0000,4213039.0000,4244558.0000,4220332.0000,4228599.0000,4201397.0000,4196888.0000,4205244.0000,4180026.0000,3894715.0000,3517951.0000,3519193.0000,3535604.0000,3499465.0000,3488505.0000,3509094.0000,3510377.0000,3514695.0000,4002600.0000,4216085.0000,4202148.0000,4214071.0000,4223809.0000,4206536.0000,4183063.0000,4207638.0000,4185387.0000,4202890.0000,4200134.0000,4209964.0000,4196117.0000,4212277.0000,4185697.0000,4216486.0000,4187400.0000,4194754.0000,4192310.0000,4188693.0000,4214021.0000,4200575.0000,4176299.0000,4202689.0000,4201948.0000,4184285.0000,4182110.0000,4191869.0000,3692351.0000,3507261.0000,3537328.0000,4020293.0000,4186269.0000,4206777.0000,4193923.0000,3822398.0000,3514084.0000" +generating large command graphs for N nodes - 16,jacobi topology,100,1,229150200,2370602.3600,2345252.5600,2386710.9400,101334.5064,72217.7801,131822.9546,"2403346.0000,2402936.0000,2415420.0000,2410540.0000,2396473.0000,2410701.0000,2398628.0000,2414247.0000,2407384.0000,2396925.0000,2404680.0000,2395301.0000,2407094.0000,2410200.0000,2406893.0000,2404618.0000,2399699.0000,2401754.0000,2403948.0000,2390913.0000,2399850.0000,2394621.0000,2397736.0000,2396133.0000,2398978.0000,2417423.0000,2394781.0000,2397986.0000,2395842.0000,2394229.0000,2407946.0000,2401343.0000,2406022.0000,2401343.0000,2397415.0000,2406082.0000,2398467.0000,2400862.0000,2402515.0000,2401994.0000,2408677.0000,2395452.0000,2399980.0000,2401954.0000,2395271.0000,2404509.0000,2384642.0000,2393037.0000,2401853.0000,2394429.0000,2420840.0000,2391705.0000,2401042.0000,2403246.0000,2398949.0000,2412233.0000,2396032.0000,2403317.0000,2402014.0000,2390902.0000,2030830.0000,2030970.0000,2066719.0000,2022053.0000,2028074.0000,2025249.0000,2038815.0000,2013908.0000,2245136.0000,2393247.0000,2396463.0000,2404398.0000,2398377.0000,2400311.0000,2387957.0000,2404890.0000,2395151.0000,2390772.0000,2405050.0000,2397787.0000,2406342.0000,2410380.0000,2399199.0000,2414368.0000,2393869.0000,2405871.0000,2411041.0000,2399369.0000,2407354.0000,2397615.0000,2405691.0000,2412955.0000,2401273.0000,2408386.0000,2389099.0000,2405290.0000,2402235.0000,2403797.0000,2414869.0000,2401724.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,soup topology,100,1,156905800,1672275.6500,1657140.5900,1679360.0400,50534.4147,26450.4528,80208.4600,"1689562.0000,1687218.0000,1660708.0000,1684002.0000,1687789.0000,1678652.0000,1693691.0000,1683070.0000,1680605.0000,1680776.0000,1675717.0000,1684934.0000,1680165.0000,1687559.0000,1687288.0000,1681056.0000,1683982.0000,1674474.0000,1683421.0000,1678021.0000,1678973.0000,1688110.0000,1688130.0000,1687569.0000,1679264.0000,1684343.0000,1676538.0000,1679925.0000,1685836.0000,1677129.0000,1687399.0000,1680856.0000,1677220.0000,1679524.0000,1679243.0000,1690214.0000,1679804.0000,1683030.0000,1678031.0000,1679002.0000,1680956.0000,1679544.0000,1687830.0000,1679514.0000,1687579.0000,1681207.0000,1678883.0000,1686978.0000,1680375.0000,1684763.0000,1681908.0000,1675606.0000,1684092.0000,1682961.0000,1687328.0000,1684823.0000,1694252.0000,1671158.0000,1677711.0000,1688020.0000,1684623.0000,1691716.0000,1680605.0000,1683932.0000,1676228.0000,1675556.0000,1681186.0000,1679172.0000,1687889.0000,1682470.0000,1673853.0000,1689943.0000,1676297.0000,1680746.0000,1678542.0000,1688030.0000,1681627.0000,1679634.0000,1693981.0000,1675716.0000,1680195.0000,1684042.0000,1680045.0000,1682509.0000,1682759.0000,1686056.0000,1682078.0000,1682739.0000,1674924.0000,1684092.0000,1683451.0000,1677480.0000,1423939.0000,1390064.0000,1375076.0000,1554897.0000,1679393.0000,1689202.0000,1678342.0000,1676218.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,chain topology,100,1,12212300,123916.1700,122278.3000,125814.5000,8936.1128,6924.1511,13784.0171,"113573.0000,108744.0000,174890.0000,144823.0000,130605.0000,128842.0000,128111.0000,126318.0000,126949.0000,127149.0000,126818.0000,126729.0000,126678.0000,126679.0000,126959.0000,126478.0000,126418.0000,126358.0000,126408.0000,126828.0000,126157.0000,125997.0000,126648.0000,126217.0000,126428.0000,126138.0000,126387.0000,126357.0000,126448.0000,126629.0000,126358.0000,125917.0000,126117.0000,126598.0000,126477.0000,131437.0000,126578.0000,126447.0000,125977.0000,125867.0000,126257.0000,125947.0000,126167.0000,125757.0000,126528.0000,125817.0000,126067.0000,126168.0000,126297.0000,126127.0000,126288.0000,125937.0000,126147.0000,126017.0000,126097.0000,125917.0000,126348.0000,125757.0000,126277.0000,126318.0000,126027.0000,126217.0000,125847.0000,126318.0000,125917.0000,125586.0000,131458.0000,126418.0000,126358.0000,126598.0000,126498.0000,125677.0000,126508.0000,125977.0000,126188.0000,126678.0000,126118.0000,126287.0000,126067.0000,125827.0000,125977.0000,126368.0000,131818.0000,114445.0000,109375.0000,109165.0000,108675.0000,108544.0000,108604.0000,108243.0000,108714.0000,108093.0000,108454.0000,108273.0000,108273.0000,107973.0000,108254.0000,108263.0000,107933.0000,107833.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,expanding tree topology,100,1,18074000,185404.8600,185109.9700,185943.1600,1975.2024,1263.4182,3233.8353,"185360.0000,183957.0000,198113.0000,187964.0000,185731.0000,185590.0000,185500.0000,189467.0000,187283.0000,186762.0000,186742.0000,184869.0000,186031.0000,184257.0000,185299.0000,186352.0000,185931.0000,185760.0000,185901.0000,184388.0000,184689.0000,185029.0000,184939.0000,185249.0000,185469.0000,184057.0000,184588.0000,185530.0000,185761.0000,190299.0000,185370.0000,185560.0000,185420.0000,184708.0000,185920.0000,184799.0000,185089.0000,184048.0000,184848.0000,183846.0000,184629.0000,185410.0000,184377.0000,184678.0000,184598.0000,185330.0000,185580.0000,184488.0000,184729.0000,185710.0000,183416.0000,194366.0000,185870.0000,185430.0000,184408.0000,185460.0000,184288.0000,184588.0000,184869.0000,183977.0000,184619.0000,184788.0000,183666.0000,183937.0000,182875.0000,184798.0000,184318.0000,184789.0000,184358.0000,184347.0000,184047.0000,184187.0000,189086.0000,184338.0000,184448.0000,184538.0000,184618.0000,185560.0000,185239.0000,184688.0000,183657.0000,184377.0000,185570.0000,185971.0000,185780.0000,185159.0000,184468.0000,185279.0000,186111.0000,185870.0000,184679.0000,184688.0000,184568.0000,185600.0000,189317.0000,184067.0000,184698.0000,185350.0000,184548.0000,184809.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,contracting tree topology,100,1,21292800,220399.8100,220061.1900,220924.4100,2115.3484,1518.9081,3113.0591,"219114.0000,219725.0000,228652.0000,226237.0000,218833.0000,218573.0000,216799.0000,217601.0000,218402.0000,219524.0000,219234.0000,218072.0000,218072.0000,218302.0000,220757.0000,220867.0000,220066.0000,219174.0000,219755.0000,220757.0000,220036.0000,225916.0000,220557.0000,220416.0000,219855.0000,219294.0000,219885.0000,221027.0000,219324.0000,217841.0000,219635.0000,219264.0000,218593.0000,220616.0000,220366.0000,220897.0000,220556.0000,220436.0000,220306.0000,218973.0000,232048.0000,219334.0000,221268.0000,219915.0000,219765.0000,219354.0000,219714.0000,220908.0000,220346.0000,220877.0000,219995.0000,220978.0000,221287.0000,220586.0000,219895.0000,220276.0000,219926.0000,220235.0000,226487.0000,221008.0000,219033.0000,220065.0000,220336.0000,220457.0000,220276.0000,218873.0000,220536.0000,220165.0000,220266.0000,218804.0000,219935.0000,220146.0000,220075.0000,218993.0000,219695.0000,219414.0000,224965.0000,219966.0000,220656.0000,220226.0000,221008.0000,219825.0000,220175.0000,218863.0000,219755.0000,220917.0000,219154.0000,219795.0000,221037.0000,220887.0000,221007.0000,220446.0000,220036.0000,220226.0000,225245.0000,220697.0000,220897.0000,219445.0000,221488.0000,219685.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,wave_sim topology,100,1,107717900,1058674.1100,1046704.7700,1067852.8100,53253.7222,43229.2222,62316.8293,"930643.0000,932276.0000,1111938.0000,1091999.0000,1085327.0000,1080197.0000,974166.0000,936795.0000,943798.0000,932677.0000,935512.0000,933990.0000,942857.0000,930513.0000,937226.0000,934380.0000,928650.0000,1076119.0000,1081750.0000,1085106.0000,1084064.0000,1084836.0000,1084004.0000,1080307.0000,1098802.0000,1083733.0000,1081088.0000,1080999.0000,1094303.0000,1080587.0000,1083073.0000,1095496.0000,1081489.0000,1084364.0000,1080919.0000,1085537.0000,1079115.0000,1078925.0000,1081930.0000,1084315.0000,1077572.0000,1079285.0000,1084445.0000,1084596.0000,1084715.0000,1078313.0000,1083904.0000,1075078.0000,1075949.0000,1075197.0000,1082611.0000,1080748.0000,1081239.0000,1085807.0000,1078714.0000,1078293.0000,1079166.0000,1083573.0000,1080087.0000,1078984.0000,1079976.0000,1088392.0000,1085517.0000,1080067.0000,1077311.0000,1089775.0000,1077501.0000,1078152.0000,1081839.0000,1077802.0000,1079817.0000,1079355.0000,1083964.0000,1076701.0000,1078303.0000,1076851.0000,1084084.0000,1079245.0000,1077972.0000,1086489.0000,1078433.0000,1080398.0000,1084275.0000,1088122.0000,1078944.0000,1078854.0000,1079946.0000,1085848.0000,1081169.0000,1083002.0000,1083914.0000,1078894.0000,1080227.0000,1079095.0000,1087310.0000,1078483.0000,1015764.0000,936865.0000,954037.0000,932637.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation,jacobi topology,100,1,34494500,352429.4300,352065.6200,352996.4300,2282.0041,1652.2592,3319.9950,"350763.0000,352036.0000,362185.0000,353068.0000,351425.0000,352667.0000,352767.0000,353599.0000,353218.0000,352798.0000,353648.0000,353308.0000,356254.0000,352286.0000,351475.0000,352006.0000,350774.0000,351334.0000,351324.0000,350543.0000,351876.0000,352316.0000,350502.0000,351374.0000,356935.0000,352076.0000,351294.0000,351746.0000,351925.0000,352607.0000,353258.0000,351966.0000,351755.0000,353198.0000,351745.0000,357045.0000,353309.0000,352146.0000,351475.0000,352216.0000,351004.0000,351104.0000,352156.0000,352126.0000,352116.0000,350874.0000,357446.0000,353048.0000,351725.0000,352657.0000,352356.0000,352818.0000,351695.0000,351194.0000,351524.0000,351534.0000,351544.0000,351064.0000,364599.0000,351705.0000,352597.0000,350974.0000,352326.0000,350162.0000,352356.0000,349521.0000,351515.0000,350703.0000,350422.0000,358398.0000,352126.0000,352045.0000,352496.0000,351184.0000,351344.0000,350903.0000,351846.0000,351244.0000,349841.0000,350714.0000,356314.0000,353458.0000,350502.0000,351314.0000,351004.0000,351344.0000,351234.0000,351895.0000,350633.0000,351805.0000,351395.0000,353959.0000,358528.0000,353468.0000,352386.0000,352727.0000,351324.0000,351645.0000,351445.0000,351315.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,soup topology,100,1,136321500,1282751.9100,1255665.2900,1308757.9700,135906.0742,128604.3718,144242.8327,"1396296.0000,1388481.0000,1245350.0000,1223979.0000,1247535.0000,1218910.0000,1243056.0000,1224571.0000,1230291.0000,1533918.0000,1252003.0000,1108090.0000,1091919.0000,1130984.0000,1173263.0000,1086599.0000,1125433.0000,1122387.0000,1113661.0000,1132917.0000,1107839.0000,1125824.0000,1268874.0000,1400184.0000,1391047.0000,1388041.0000,1394493.0000,1410092.0000,1394513.0000,1453445.0000,1421525.0000,1377852.0000,1411135.0000,1422887.0000,1418519.0000,1428287.0000,1385176.0000,1419020.0000,1399012.0000,1425041.0000,1390916.0000,1452473.0000,1272622.0000,1133678.0000,1093822.0000,1097650.0000,1115394.0000,1106707.0000,1113871.0000,1192991.0000,1090397.0000,1128769.0000,1150981.0000,1099623.0000,1119151.0000,1116075.0000,1092189.0000,1150721.0000,1119642.0000,1084906.0000,1142515.0000,1117959.0000,1147384.0000,1117087.0000,1091008.0000,1152694.0000,1135873.0000,1117107.0000,1118099.0000,1241272.0000,1394403.0000,1393491.0000,1424490.0000,1391708.0000,1396687.0000,1424730.0000,1382771.0000,1397489.0000,1337546.0000,1427886.0000,1398070.0000,1387029.0000,1389764.0000,1395104.0000,1424350.0000,1419822.0000,1390375.0000,1398029.0000,1392338.0000,1394262.0000,1390976.0000,1401115.0000,1413950.0000,1396076.0000,1421955.0000,1427205.0000,1416725.0000,1394042.0000,1422666.0000,1394112.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,chain topology,100,1,25499600,331976.1100,327589.7000,337193.3100,24368.7281,20677.6954,29165.6516,"315757.0000,318994.0000,413172.0000,347938.0000,375280.0000,346696.0000,379628.0000,375931.0000,407721.0000,345273.0000,376873.0000,354190.0000,345052.0000,375941.0000,377163.0000,349411.0000,346586.0000,347246.0000,382233.0000,346575.0000,348689.0000,347017.0000,375680.0000,376392.0000,380069.0000,377925.0000,347507.0000,316749.0000,318603.0000,319364.0000,349932.0000,318692.0000,319784.0000,319023.0000,324323.0000,341005.0000,319585.0000,350773.0000,319073.0000,320115.0000,315727.0000,318812.0000,321177.0000,319805.0000,320045.0000,319203.0000,316187.0000,317039.0000,294738.0000,319895.0000,318763.0000,316399.0000,318953.0000,317761.0000,291991.0000,319363.0000,319123.0000,319885.0000,316358.0000,320265.0000,291430.0000,319664.0000,320085.0000,347267.0000,316829.0000,320045.0000,321168.0000,317841.0000,322079.0000,318272.0000,317681.0000,318372.0000,321588.0000,318442.0000,319053.0000,322640.0000,314164.0000,318893.0000,379548.0000,319263.0000,317911.0000,319975.0000,318342.0000,317611.0000,351124.0000,318282.0000,319615.0000,319744.0000,316167.0000,318782.0000,292082.0000,320756.0000,318452.0000,320406.0000,315657.0000,320266.0000,347698.0000,319284.0000,322420.0000,319264.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,expanding tree topology,100,1,36556500,398219.5000,392299.7500,403646.2500,28857.1713,25327.1053,32280.2622,"407892.0000,395268.0000,399556.0000,434021.0000,406790.0000,406669.0000,405677.0000,406749.0000,404525.0000,408563.0000,425415.0000,419143.0000,401088.0000,401890.0000,396500.0000,397893.0000,400648.0000,395358.0000,403083.0000,396400.0000,426968.0000,398574.0000,401349.0000,395538.0000,401790.0000,398213.0000,399737.0000,342157.0000,373637.0000,371543.0000,344201.0000,345934.0000,346415.0000,343720.0000,345253.0000,348219.0000,346826.0000,344111.0000,344402.0000,346565.0000,347527.0000,345703.0000,371352.0000,375490.0000,348068.0000,349331.0000,348139.0000,346776.0000,350894.0000,350824.0000,402592.0000,407922.0000,410326.0000,403304.0000,407521.0000,433441.0000,406379.0000,407772.0000,403995.0000,404586.0000,437598.0000,435404.0000,406349.0000,437007.0000,403924.0000,405246.0000,436205.0000,408814.0000,433410.0000,436706.0000,408001.0000,402692.0000,407461.0000,435374.0000,406099.0000,434232.0000,407401.0000,407612.0000,405016.0000,434462.0000,435825.0000,437709.0000,433170.0000,405778.0000,410396.0000,404054.0000,407330.0000,435645.0000,435935.0000,433720.0000,404264.0000,405347.0000,406660.0000,439401.0000,406268.0000,405757.0000,406579.0000,401891.0000,409234.0000,407752.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,contracting tree topology,100,1,38580600,335595.4000,326510.5300,346885.1500,51698.9800,44445.0815,61989.7859,"298303.0000,304546.0000,422679.0000,412570.0000,411228.0000,437017.0000,418422.0000,417139.0000,417851.0000,447958.0000,420906.0000,406519.0000,421437.0000,425245.0000,414575.0000,436927.0000,423511.0000,414204.0000,522178.0000,455872.0000,420666.0000,432659.0000,433250.0000,401590.0000,321999.0000,297152.0000,292092.0000,327198.0000,304476.0000,307281.0000,311950.0000,329183.0000,320716.0000,308243.0000,299335.0000,287483.0000,310958.0000,289467.0000,298504.0000,295068.0000,303564.0000,293154.0000,314615.0000,301129.0000,294386.0000,302923.0000,304636.0000,322770.0000,314886.0000,302993.0000,333561.0000,310988.0000,295328.0000,316378.0000,308122.0000,382895.0000,303604.0000,313522.0000,292223.0000,304676.0000,310076.0000,318302.0000,313943.0000,319905.0000,314264.0000,305167.0000,311810.0000,304937.0000,300959.0000,311649.0000,327359.0000,303614.0000,317410.0000,311950.0000,313663.0000,312581.0000,297563.0000,313653.0000,297622.0000,305136.0000,324162.0000,297432.0000,328501.0000,309906.0000,320647.0000,302983.0000,313332.0000,310948.0000,296761.0000,296069.0000,306400.0000,318542.0000,303223.0000,318022.0000,324243.0000,296851.0000,321618.0000,315547.0000,316969.0000,291111.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,wave_sim topology,100,1,108237800,1095232.7900,1066525.6000,1139158.9900,178500.9506,126847.0472,239922.5250,"933449.0000,924662.0000,1735169.0000,1689111.0000,1744146.0000,1740930.0000,1709120.0000,1268384.0000,1085326.0000,1073614.0000,1118590.0000,1069617.0000,1069496.0000,1081028.0000,1105805.0000,1063285.0000,1108641.0000,1102930.0000,1078013.0000,1102569.0000,1074886.0000,1102298.0000,1068264.0000,1104083.0000,1081950.0000,1097610.0000,1080227.0000,1101106.0000,1070679.0000,1069827.0000,1165809.0000,1100084.0000,1103681.0000,1077261.0000,1096518.0000,1113280.0000,1066110.0000,1080667.0000,1058185.0000,1075138.0000,1130222.0000,1074136.0000,1105224.0000,1109753.0000,1077411.0000,1071179.0000,1079646.0000,1128579.0000,1080588.0000,1101827.0000,1073153.0000,1073323.0000,1105084.0000,1071861.0000,1076780.0000,1072722.0000,1096267.0000,1081800.0000,1073073.0000,1075869.0000,1124221.0000,1077582.0000,1078284.0000,1100746.0000,1106367.0000,1075358.0000,1071570.0000,1059588.0000,1071932.0000,1049869.0000,1074366.0000,1071971.0000,1086359.0000,1075829.0000,1086519.0000,1074867.0000,1089054.0000,1072342.0000,1095266.0000,1866809.0000,970639.0000,955300.0000,951914.0000,927888.0000,937386.0000,934801.0000,934660.0000,952785.0000,928610.0000,936144.0000,941514.0000,941354.0000,936605.0000,934872.0000,939039.0000,933428.0000,917148.0000,936996.0000,926736.0000,926516.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread,jacobi topology,100,1,49138900,487244.2900,473092.3300,505442.3100,81480.8257,66940.3722,96892.5540,"429132.0000,436115.0000,641284.0000,625455.0000,619563.0000,616367.0000,639451.0000,678906.0000,696640.0000,699745.0000,694395.0000,699014.0000,695788.0000,695837.0000,667224.0000,724983.0000,487482.0000,489737.0000,497091.0000,509133.0000,536205.0000,463507.0000,493404.0000,465240.0000,495006.0000,492542.0000,462576.0000,466964.0000,492432.0000,463758.0000,523110.0000,493003.0000,464850.0000,493585.0000,463988.0000,492352.0000,494867.0000,464138.0000,464118.0000,492011.0000,466383.0000,464259.0000,491981.0000,465521.0000,460702.0000,496570.0000,462124.0000,519222.0000,526516.0000,462916.0000,463968.0000,493313.0000,525595.0000,488254.0000,500938.0000,461814.0000,494916.0000,461283.0000,465972.0000,465692.0000,493364.0000,463267.0000,463067.0000,465060.0000,435494.0000,427709.0000,417640.0000,427679.0000,427980.0000,419705.0000,407110.0000,424985.0000,426256.0000,415856.0000,416838.0000,434081.0000,426838.0000,424483.0000,417640.0000,429763.0000,419935.0000,444321.0000,423121.0000,422098.0000,427179.0000,424654.0000,410997.0000,428130.0000,421938.0000,438440.0000,414805.0000,436175.0000,423491.0000,418292.0000,421938.0000,415206.0000,398144.0000,404185.0000,456093.0000,451535.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,soup topology,100,1,266087500,2611775.5400,2584588.0000,2634637.8900,126766.0803,110364.6052,139357.7735,"2381205.0000,2674501.0000,2700701.0000,2682316.0000,2685111.0000,2685131.0000,2682306.0000,2688137.0000,2680161.0000,2683749.0000,2689539.0000,2683167.0000,2673569.0000,2680733.0000,2687726.0000,2679631.0000,2688547.0000,2674160.0000,2684771.0000,2674691.0000,2674040.0000,2678899.0000,2684430.0000,2687265.0000,2684198.0000,2687766.0000,2683568.0000,2677456.0000,2679781.0000,2685872.0000,2680482.0000,2699879.0000,2684280.0000,2681875.0000,2685993.0000,2687455.0000,2678068.0000,2681985.0000,2689950.0000,2679730.0000,2518465.0000,2397035.0000,2385202.0000,2386916.0000,2378700.0000,2390001.0000,2392486.0000,2385032.0000,2402725.0000,2387736.0000,2389901.0000,2386264.0000,2384721.0000,2537371.0000,2686043.0000,2688207.0000,2693356.0000,2687105.0000,2682165.0000,2688968.0000,2681755.0000,2690581.0000,2685722.0000,2681945.0000,2694559.0000,2685892.0000,2694639.0000,2676865.0000,2692635.0000,2695260.0000,2681153.0000,2710850.0000,2695331.0000,2688637.0000,2688828.0000,2681604.0000,2684219.0000,2687736.0000,2685562.0000,2687715.0000,2695120.0000,2688247.0000,2681104.0000,2682165.0000,2689800.0000,2684360.0000,2691212.0000,2702694.0000,2691894.0000,2456848.0000,2385282.0000,2385042.0000,2389991.0000,2387977.0000,2385403.0000,2388679.0000,2385082.0000,2388428.0000,2382427.0000,2383088.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,chain topology,100,1,41837500,413897.7200,412131.5000,415433.5300,8357.3488,7506.0232,8973.5390,"400728.0000,400718.0000,409344.0000,401971.0000,400858.0000,401270.0000,401059.0000,400648.0000,401058.0000,400618.0000,405627.0000,400628.0000,400909.0000,400537.0000,400668.0000,400848.0000,400387.0000,400648.0000,400568.0000,400738.0000,418822.0000,419774.0000,419303.0000,419273.0000,418792.0000,419704.0000,418362.0000,419113.0000,419204.0000,422780.0000,419423.0000,419344.0000,419544.0000,419634.0000,418762.0000,419003.0000,418953.0000,418652.0000,419013.0000,419724.0000,419364.0000,419744.0000,419364.0000,419203.0000,418723.0000,418983.0000,419223.0000,419584.0000,422530.0000,418842.0000,419604.0000,418973.0000,418562.0000,418522.0000,419183.0000,418662.0000,418802.0000,419263.0000,419985.0000,419915.0000,420115.0000,419915.0000,419203.0000,419634.0000,418692.0000,418411.0000,418933.0000,419554.0000,420526.0000,419574.0000,419003.0000,419453.0000,419193.0000,419243.0000,419103.0000,418622.0000,419354.0000,419814.0000,420075.0000,419394.0000,419654.0000,418372.0000,419033.0000,419283.0000,419413.0000,418883.0000,420035.0000,419844.0000,423291.0000,406530.0000,401128.0000,400507.0000,400328.0000,400718.0000,400728.0000,400528.0000,406980.0000,401259.0000,400989.0000,400418.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,expanding tree topology,100,1,48335600,489515.3000,489120.5800,490216.2800,2617.2425,1726.0216,4193.0842,"488455.0000,487934.0000,500146.0000,489347.0000,488705.0000,489707.0000,490629.0000,489546.0000,488756.0000,488404.0000,488485.0000,488745.0000,490037.0000,488955.0000,496259.0000,489747.0000,488274.0000,489126.0000,489256.0000,488735.0000,487352.0000,489607.0000,497993.0000,489937.0000,489046.0000,489837.0000,489817.0000,489416.0000,489166.0000,488515.0000,505957.0000,489186.0000,488905.0000,487523.0000,488134.0000,489016.0000,488044.0000,488494.0000,495447.0000,490969.0000,487663.0000,488234.0000,488044.0000,488645.0000,488615.0000,489657.0000,489316.0000,489476.0000,487964.0000,488645.0000,487784.0000,488033.0000,488554.0000,488685.0000,488133.0000,491249.0000,489216.0000,488776.0000,487743.0000,487973.0000,489716.0000,489577.0000,489176.0000,489757.0000,489988.0000,488585.0000,489546.0000,489396.0000,488104.0000,488635.0000,489416.0000,490298.0000,489005.0000,488846.0000,487863.0000,488134.0000,488124.0000,488695.0000,488054.0000,489867.0000,489417.0000,488524.0000,489497.0000,488655.0000,487653.0000,489056.0000,489156.0000,495478.0000,489947.0000,489897.0000,489256.0000,487052.0000,488044.0000,488033.0000,488845.0000,488324.0000,493705.0000,489406.0000,488465.0000,488324.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,contracting tree topology,100,1,51720200,521666.6700,519516.9600,523149.4000,9005.1085,6708.5787,11375.3898,"494065.0000,492813.0000,533009.0000,524112.0000,524262.0000,522088.0000,529763.0000,523501.0000,522680.0000,523941.0000,523881.0000,523091.0000,522459.0000,524302.0000,527980.0000,523742.0000,523962.0000,523210.0000,522109.0000,524062.0000,524182.0000,524823.0000,523451.0000,524553.0000,523832.0000,523721.0000,523692.0000,525154.0000,523942.0000,529703.0000,523942.0000,523861.0000,524183.0000,524523.0000,524834.0000,525014.0000,523481.0000,527458.0000,524704.0000,523851.0000,524462.0000,523692.0000,523000.0000,523831.0000,524302.0000,523431.0000,523221.0000,523621.0000,522970.0000,524603.0000,524543.0000,523671.0000,529442.0000,523751.0000,524763.0000,523521.0000,523901.0000,524724.0000,523100.0000,524072.0000,529262.0000,524133.0000,524362.0000,526065.0000,523681.0000,522669.0000,523140.0000,524904.0000,524473.0000,524412.0000,523070.0000,523190.0000,524193.0000,522489.0000,524763.0000,525535.0000,524343.0000,523922.0000,522879.0000,522970.0000,523561.0000,522729.0000,538971.0000,524382.0000,524062.0000,522960.0000,523922.0000,523100.0000,523301.0000,524303.0000,528891.0000,525084.0000,510466.0000,494547.0000,493654.0000,493173.0000,494606.0000,494265.0000,496099.0000,495579.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,wave_sim topology,100,1,312661700,3089699.4100,3075547.6200,3102408.6000,68614.0589,59135.1451,82151.8841,"2962237.0000,2967346.0000,2990460.0000,2968879.0000,2975291.0000,2964120.0000,3092624.0000,3129204.0000,3124925.0000,3118303.0000,3120286.0000,3119265.0000,3115047.0000,3117340.0000,3118063.0000,3065131.0000,2963098.0000,2964922.0000,2960012.0000,2963158.0000,2965442.0000,2970142.0000,2959461.0000,2968699.0000,2966364.0000,3119345.0000,3127550.0000,3124434.0000,3126588.0000,3122070.0000,3122100.0000,3123082.0000,3124544.0000,3128883.0000,3122050.0000,3119445.0000,3326838.0000,3126548.0000,3127430.0000,3126699.0000,3120777.0000,3118061.0000,3123883.0000,3124575.0000,3120266.0000,3125537.0000,3121578.0000,3123362.0000,3140344.0000,3119564.0000,3122360.0000,3120677.0000,3121889.0000,3117651.0000,3121018.0000,3118613.0000,3120447.0000,3116329.0000,3118413.0000,3123663.0000,3121318.0000,3124955.0000,3116208.0000,3119925.0000,3117932.0000,3123222.0000,3124985.0000,3117470.0000,3121700.0000,3128231.0000,3125026.0000,3126538.0000,3119655.0000,3118894.0000,3124595.0000,3121258.0000,3116951.0000,3122250.0000,3123683.0000,3127921.0000,3127059.0000,3122531.0000,3130095.0000,3129393.0000,3126839.0000,3118994.0000,3123211.0000,3121058.0000,3125427.0000,3124644.0000,3122161.0000,3126117.0000,3123222.0000,2975252.0000,2963899.0000,2963750.0000,2972877.0000,2968348.0000,2962156.0000,2963759.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task,jacobi topology,100,1,83223800,826832.1800,822060.9000,832117.7100,25621.2665,23742.0416,30520.6072,"804775.0000,805165.0000,860109.0000,855210.0000,855551.0000,860120.0000,857054.0000,856512.0000,856472.0000,856333.0000,858146.0000,855340.0000,855651.0000,856091.0000,855160.0000,860881.0000,855941.0000,855150.0000,855621.0000,860490.0000,836745.0000,807149.0000,806017.0000,804914.0000,808581.0000,805236.0000,804213.0000,804073.0000,804594.0000,808823.0000,804894.0000,804093.0000,805586.0000,805115.0000,812148.0000,805786.0000,804625.0000,805416.0000,805275.0000,810646.0000,805306.0000,805115.0000,806387.0000,804614.0000,809203.0000,805606.0000,805095.0000,804634.0000,803082.0000,827888.0000,856593.0000,856262.0000,855821.0000,855711.0000,862644.0000,857715.0000,856643.0000,856082.0000,857374.0000,847285.0000,807280.0000,804925.0000,804815.0000,813801.0000,805706.0000,805425.0000,805656.0000,806447.0000,809162.0000,805676.0000,804845.0000,804915.0000,805145.0000,821446.0000,856522.0000,856924.0000,857244.0000,855430.0000,912990.0000,856152.0000,855390.0000,854960.0000,855561.0000,861743.0000,808592.0000,805335.0000,805025.0000,819893.0000,806608.0000,805997.0000,804113.0000,805536.0000,806799.0000,805917.0000,818510.0000,806077.0000,805316.0000,811407.0000,805987.0000,805185.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,soup topology,100,1,244515100,2443104.8200,2420960.4800,2460521.0900,99371.7737,77550.0342,122028.0368,"2180584.0000,2150897.0000,2510860.0000,2475132.0000,2549544.0000,2498657.0000,2496112.0000,2467689.0000,2492516.0000,2472367.0000,2462469.0000,2480383.0000,2449373.0000,2480973.0000,2462980.0000,2414107.0000,2464843.0000,2450446.0000,2477788.0000,2470644.0000,2477477.0000,2480924.0000,2461377.0000,2466426.0000,2469602.0000,2470364.0000,2470074.0000,2471446.0000,2662218.0000,2694930.0000,2622472.0000,2473419.0000,2476586.0000,2438182.0000,2463511.0000,2456908.0000,2463321.0000,2461648.0000,2524676.0000,2462759.0000,2478329.0000,2476094.0000,2463600.0000,2475042.0000,2471045.0000,2480413.0000,2441750.0000,2504068.0000,2465875.0000,2461908.0000,2455496.0000,2448102.0000,2452239.0000,2451057.0000,2442651.0000,2480423.0000,2446969.0000,2469873.0000,2444435.0000,2495652.0000,2501703.0000,2477086.0000,2473669.0000,2449995.0000,2450146.0000,2495181.0000,2463530.0000,2470664.0000,2466647.0000,2470685.0000,2461777.0000,2471325.0000,2465084.0000,2494469.0000,2445967.0000,2457530.0000,2449544.0000,2462268.0000,2469011.0000,2442160.0000,2462599.0000,2475292.0000,2454724.0000,2448922.0000,2458300.0000,2474421.0000,2462920.0000,2449685.0000,2473940.0000,2469983.0000,2488098.0000,2237271.0000,2180984.0000,2187768.0000,2174393.0000,2183048.0000,2185613.0000,2157971.0000,2185844.0000,2178560.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,chain topology,100,1,49613700,463588.7900,455796.0800,472344.7300,41924.6378,37755.7853,46435.3809,"424253.0000,429072.0000,548458.0000,526466.0000,523270.0000,490749.0000,494456.0000,550822.0000,553448.0000,492282.0000,494546.0000,518191.0000,553999.0000,492802.0000,523160.0000,521748.0000,494977.0000,491610.0000,493625.0000,518131.0000,523060.0000,521718.0000,523220.0000,521166.0000,524393.0000,550021.0000,492302.0000,526607.0000,518953.0000,492983.0000,494907.0000,519894.0000,553859.0000,523371.0000,493163.0000,521958.0000,431146.0000,448850.0000,455743.0000,457686.0000,459840.0000,460261.0000,426136.0000,430214.0000,434843.0000,416298.0000,415436.0000,426327.0000,435484.0000,434452.0000,435794.0000,451143.0000,444821.0000,415967.0000,428060.0000,436877.0000,415466.0000,432318.0000,413091.0000,434533.0000,434532.0000,435434.0000,417590.0000,432468.0000,427970.0000,445714.0000,429693.0000,439261.0000,448639.0000,463787.0000,442948.0000,429893.0000,435023.0000,429773.0000,425806.0000,467034.0000,435564.0000,425756.0000,414204.0000,438409.0000,451214.0000,429333.0000,466583.0000,435464.0000,429954.0000,444381.0000,428290.0000,432719.0000,443419.0000,423632.0000,435333.0000,419574.0000,429001.0000,425805.0000,471372.0000,431105.0000,427839.0000,425284.0000,471041.0000,439612.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,expanding tree topology,100,1,55192100,508797.9700,503913.5700,515264.3900,28515.1834,22494.6737,37283.5343,"514654.0000,555201.0000,577423.0000,549360.0000,550472.0000,608522.0000,584236.0000,607921.0000,629142.0000,506549.0000,518672.0000,508993.0000,523230.0000,505396.0000,523992.0000,518000.0000,528189.0000,489366.0000,512921.0000,496851.0000,497482.0000,487332.0000,503132.0000,504335.0000,518101.0000,516037.0000,505717.0000,506058.0000,510576.0000,503824.0000,493254.0000,514774.0000,528250.0000,525014.0000,479758.0000,511608.0000,486370.0000,499335.0000,476852.0000,529683.0000,511899.0000,508623.0000,506529.0000,503624.0000,502240.0000,523581.0000,512961.0000,472935.0000,479488.0000,474899.0000,479608.0000,476562.0000,498924.0000,499917.0000,527979.0000,488846.0000,515997.0000,479066.0000,479998.0000,510787.0000,504836.0000,510456.0000,506810.0000,518782.0000,481972.0000,495438.0000,477844.0000,502482.0000,524513.0000,509144.0000,510166.0000,499606.0000,562394.0000,548819.0000,503734.0000,516387.0000,501799.0000,501520.0000,512160.0000,524372.0000,499726.0000,476562.0000,496750.0000,482504.0000,475109.0000,478656.0000,491350.0000,497802.0000,499015.0000,490408.0000,499165.0000,474949.0000,491411.0000,495117.0000,484777.0000,493744.0000,501479.0000,519794.0000,481791.0000,475409.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,contracting tree topology,100,1,59263800,600065.1400,596230.2900,604273.9300,20494.0892,18352.2382,22860.7785,"606949.0000,610817.0000,611187.0000,606849.0000,611148.0000,608322.0000,638780.0000,610726.0000,607971.0000,608562.0000,610526.0000,583124.0000,606919.0000,611327.0000,605096.0000,583956.0000,579868.0000,607450.0000,582543.0000,621156.0000,641164.0000,580349.0000,580910.0000,580700.0000,606839.0000,580339.0000,642737.0000,605937.0000,582823.0000,607731.0000,579227.0000,580780.0000,582854.0000,581421.0000,607189.0000,580970.0000,636876.0000,584016.0000,608733.0000,582714.0000,577243.0000,578906.0000,638369.0000,613391.0000,606658.0000,582372.0000,578445.0000,637858.0000,579928.0000,582303.0000,580309.0000,580630.0000,581000.0000,608772.0000,580439.0000,637718.0000,580910.0000,637558.0000,583575.0000,612029.0000,605116.0000,580058.0000,581592.0000,581621.0000,608552.0000,637117.0000,582543.0000,608372.0000,608532.0000,581280.0000,581130.0000,608432.0000,582092.0000,637507.0000,608482.0000,609234.0000,580199.0000,580950.0000,579988.0000,578085.0000,610827.0000,580740.0000,580389.0000,581601.0000,580459.0000,640112.0000,640493.0000,605807.0000,612430.0000,608743.0000,607110.0000,641234.0000,577102.0000,611037.0000,610767.0000,581271.0000,607260.0000,639220.0000,578846.0000,584186.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,wave_sim topology,100,1,316034900,3056070.8700,3036844.1900,3075985.6100,99648.0192,84809.2576,133220.9011,"3118232.0000,3074871.0000,3078778.0000,2950444.0000,2922662.0000,2924164.0000,2923493.0000,2905158.0000,2873579.0000,2865593.0000,2867036.0000,2895279.0000,2861665.0000,3033792.0000,3135204.0000,3105929.0000,3105458.0000,3104667.0000,3109416.0000,3130515.0000,3044303.0000,3108644.0000,3109806.0000,3056836.0000,3137639.0000,3182043.0000,3113483.0000,3082584.0000,3124484.0000,3057308.0000,3129494.0000,3104636.0000,3084468.0000,3097112.0000,3042819.0000,3056025.0000,3148320.0000,3262226.0000,3500167.0000,3123222.0000,3077064.0000,3064791.0000,3050244.0000,3054551.0000,3109706.0000,3101540.0000,3081923.0000,3110708.0000,3132610.0000,3102753.0000,3086562.0000,3100168.0000,3105277.0000,3076514.0000,3081943.0000,3104255.0000,3076393.0000,3104627.0000,3083577.0000,2955023.0000,2874832.0000,2879830.0000,2905669.0000,2899277.0000,2881925.0000,2890821.0000,2893897.0000,2888467.0000,2896091.0000,2896773.0000,2975141.0000,3068668.0000,3108203.0000,3078567.0000,3083516.0000,3134002.0000,3100269.0000,3077414.0000,3103104.0000,3083687.0000,3100979.0000,3182585.0000,3107763.0000,3113824.0000,3084358.0000,3089809.0000,3091973.0000,3110718.0000,3086331.0000,3090320.0000,3110407.0000,3099066.0000,3136999.0000,3080420.0000,3073708.0000,3135565.0000,3014245.0000,2976213.0000,2933313.0000,3056555.0000" +building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task,jacobi topology,100,1,92243600,869813.7900,858670.3900,880878.6600,56456.7181,52348.1756,61830.9042,"809353.0000,819913.0000,940602.0000,930713.0000,987060.0000,929071.0000,931565.0000,957454.0000,957795.0000,953486.0000,929361.0000,927046.0000,896769.0000,901368.0000,958486.0000,897220.0000,902900.0000,900496.0000,898372.0000,900866.0000,899594.0000,898402.0000,898662.0000,899574.0000,988834.0000,927717.0000,830914.0000,821607.0000,801859.0000,801799.0000,845051.0000,829561.0000,810876.0000,828649.0000,804814.0000,801799.0000,792962.0000,813231.0000,795096.0000,780198.0000,801118.0000,813311.0000,781731.0000,789365.0000,783955.0000,813210.0000,790097.0000,798803.0000,810635.0000,804764.0000,798212.0000,816877.0000,789696.0000,837336.0000,792311.0000,807139.0000,822849.0000,815505.0000,855601.0000,902190.0000,833589.0000,830724.0000,793063.0000,799805.0000,805807.0000,813270.0000,821566.0000,795427.0000,899243.0000,904764.0000,903201.0000,903932.0000,896328.0000,899134.0000,900827.0000,898733.0000,929451.0000,901247.0000,927036.0000,902370.0000,899294.0000,900085.0000,896478.0000,901388.0000,899384.0000,902681.0000,897390.0000,928529.0000,901668.0000,956001.0000,900667.0000,899985.0000,899294.0000,898703.0000,896198.0000,936113.0000,927106.0000,924942.0000,903972.0000,926184.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,soup topology,100,1,194369800,1949065.2400,1915451.5000,1981062.3900,167863.5928,157350.8855,173728.6522,"2086696.0000,2085173.0000,2094721.0000,2077218.0000,2094170.0000,2078060.0000,1938094.0000,1731162.0000,1741101.0000,1730630.0000,1737513.0000,1723908.0000,1732163.0000,1739156.0000,1727785.0000,1737814.0000,1734668.0000,1743465.0000,1728497.0000,1727415.0000,1727505.0000,1736231.0000,1745519.0000,1733516.0000,1884572.0000,2087206.0000,2082318.0000,2081877.0000,2089411.0000,2079413.0000,2084201.0000,2082979.0000,2087788.0000,2080424.0000,2080284.0000,2083490.0000,2087106.0000,2076526.0000,2085745.0000,2087037.0000,2084261.0000,2083480.0000,2081727.0000,2024097.0000,1732053.0000,1733025.0000,1740590.0000,1725702.0000,1742683.0000,1731633.0000,1840889.0000,2077298.0000,2085153.0000,2075515.0000,2088550.0000,2078119.0000,2130869.0000,2074212.0000,2089963.0000,2080243.0000,2086897.0000,2077939.0000,2088279.0000,2084772.0000,2079502.0000,2078991.0000,2087648.0000,2083440.0000,2080675.0000,2064654.0000,1718107.0000,1735349.0000,1745088.0000,1731893.0000,1742333.0000,1736332.0000,1741040.0000,1734899.0000,1736773.0000,1745990.0000,1730590.0000,1736772.0000,1731311.0000,1741340.0000,2097496.0000,2095132.0000,2084030.0000,2091304.0000,2085153.0000,2087978.0000,2086635.0000,2091995.0000,2085242.0000,2094681.0000,2082779.0000,2087828.0000,2088800.0000,2091255.0000,2076457.0000,2086526.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,chain topology,100,1,36034700,381401.1800,380989.0600,381984.0800,2473.1975,1930.9793,3449.5116,"379197.0000,387272.0000,393815.0000,382965.0000,381101.0000,382113.0000,380820.0000,386742.0000,381001.0000,381341.0000,380791.0000,380990.0000,381301.0000,380069.0000,381071.0000,381943.0000,379628.0000,386561.0000,380841.0000,380099.0000,380239.0000,381672.0000,377965.0000,378787.0000,379768.0000,379739.0000,381241.0000,379869.0000,385690.0000,380509.0000,381060.0000,380910.0000,379538.0000,380540.0000,379208.0000,381111.0000,382062.0000,379928.0000,387594.0000,382724.0000,381602.0000,381362.0000,379718.0000,380320.0000,379909.0000,380991.0000,381481.0000,381171.0000,381131.0000,386301.0000,380620.0000,380300.0000,379478.0000,381722.0000,380369.0000,378916.0000,380280.0000,380900.0000,381040.0000,386150.0000,380189.0000,378637.0000,379648.0000,379107.0000,380510.0000,381602.0000,380590.0000,380270.0000,380510.0000,379738.0000,387042.0000,382403.0000,380440.0000,381351.0000,379187.0000,380700.0000,381492.0000,382253.0000,380970.0000,380941.0000,388706.0000,381060.0000,381291.0000,382213.0000,382404.0000,381732.0000,378676.0000,380860.0000,379879.0000,380760.0000,381512.0000,387603.0000,380159.0000,380660.0000,379899.0000,381782.0000,379168.0000,381361.0000,380220.0000,379017.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,expanding tree topology,100,1,42932600,438970.1700,433560.6600,443649.8800,25710.9706,22730.6207,27918.2863,"394967.0000,394807.0000,462736.0000,454119.0000,461895.0000,453979.0000,454190.0000,452947.0000,452767.0000,453128.0000,453037.0000,452717.0000,452897.0000,461173.0000,452467.0000,454269.0000,452456.0000,451725.0000,453458.0000,451425.0000,452466.0000,458598.0000,456174.0000,454390.0000,453869.0000,456154.0000,453408.0000,452236.0000,454210.0000,454200.0000,460461.0000,455352.0000,454951.0000,453138.0000,453358.0000,452507.0000,452356.0000,453488.0000,452467.0000,459880.0000,455713.0000,452797.0000,454721.0000,453097.0000,453217.0000,452556.0000,453498.0000,452326.0000,459870.0000,454820.0000,454460.0000,453688.0000,454861.0000,454510.0000,452376.0000,452336.0000,452396.0000,458358.0000,453749.0000,453207.0000,456043.0000,452656.0000,453868.0000,453748.0000,450121.0000,457756.0000,457416.0000,452015.0000,452576.0000,453529.0000,459159.0000,456354.0000,455131.0000,454730.0000,457937.0000,399326.0000,399936.0000,397982.0000,396470.0000,395698.0000,396079.0000,396610.0000,396200.0000,395278.0000,402612.0000,396761.0000,395618.0000,396230.0000,395669.0000,396169.0000,396119.0000,396160.0000,396420.0000,395458.0000,405838.0000,396210.0000,395629.0000,396119.0000,396821.0000,395188.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,contracting tree topology,100,1,48997100,495691.4400,495233.9900,496277.5000,2622.5318,2121.5878,3251.1952,"495047.0000,494706.0000,505406.0000,495989.0000,493995.0000,493634.0000,500488.0000,497792.0000,494767.0000,494005.0000,495608.0000,495468.0000,495097.0000,495457.0000,503072.0000,493675.0000,495097.0000,493094.0000,492632.0000,497262.0000,493975.0000,493164.0000,499736.0000,493514.0000,495668.0000,494526.0000,494325.0000,494746.0000,496370.0000,495157.0000,500478.0000,494997.0000,495678.0000,495248.0000,493995.0000,493714.0000,497040.0000,496459.0000,504155.0000,496139.0000,496791.0000,494837.0000,497031.0000,493835.0000,497502.0000,495638.0000,502782.0000,497802.0000,495188.0000,496149.0000,495488.0000,495819.0000,496490.0000,494195.0000,501930.0000,495247.0000,494576.0000,495959.0000,495638.0000,492492.0000,495197.0000,495578.0000,500587.0000,495178.0000,495898.0000,494165.0000,495899.0000,492412.0000,494516.0000,494686.0000,502251.0000,494496.0000,493985.0000,494746.0000,491651.0000,493404.0000,494616.0000,494506.0000,500267.0000,494746.0000,493344.0000,493595.0000,494185.0000,492171.0000,494887.0000,493214.0000,500508.0000,494816.0000,497071.0000,494557.0000,495077.0000,493704.0000,493524.0000,492833.0000,495748.0000,495548.0000,495227.0000,495267.0000,495027.0000,493293.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,wave_sim topology,100,1,218275500,2197912.6400,2171435.2800,2218971.4100,120132.5292,100096.3133,137147.9246,"1979943.0000,2183440.0000,2307064.0000,2264563.0000,2259394.0000,2255686.0000,2255375.0000,2257740.0000,2263110.0000,2255946.0000,2250396.0000,2255917.0000,2248863.0000,2251368.0000,2256278.0000,2256017.0000,2252611.0000,2252781.0000,2261047.0000,2257821.0000,2259985.0000,2257981.0000,2252159.0000,2253532.0000,2312665.0000,2256799.0000,2245447.0000,2261197.0000,2251669.0000,2248834.0000,2252129.0000,2259875.0000,2248993.0000,2258662.0000,2257390.0000,2258662.0000,2258662.0000,2251368.0000,2256608.0000,2251178.0000,2257460.0000,2261107.0000,2204049.0000,1951039.0000,1945608.0000,1950578.0000,1945618.0000,1956349.0000,1932734.0000,1942233.0000,1937122.0000,1944807.0000,1947000.0000,1944686.0000,1993169.0000,1937183.0000,1952581.0000,1952521.0000,2220821.0000,2258011.0000,2253743.0000,2256958.0000,2254133.0000,2262039.0000,2253492.0000,2263160.0000,2257981.0000,2255175.0000,2270785.0000,2252761.0000,2264383.0000,2258953.0000,2261538.0000,2250356.0000,2259523.0000,2256398.0000,2251168.0000,2254524.0000,2250847.0000,2263141.0000,2267288.0000,2299600.0000,2255576.0000,2251650.0000,2256348.0000,2250727.0000,2251749.0000,2244505.0000,2248222.0000,2246940.0000,2252340.0000,2256007.0000,2251248.0000,2254844.0000,2252189.0000,2261808.0000,2248924.0000,2003899.0000,1938014.0000,1934497.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation,jacobi topology,100,1,79043300,732671.2000,723757.0800,745170.8000,53491.0294,43023.7924,76618.0057,"703793.0000,702490.0000,839330.0000,828118.0000,832927.0000,829020.0000,830684.0000,830253.0000,833990.0000,830644.0000,832587.0000,828460.0000,832237.0000,1008681.0000,721155.0000,705957.0000,706177.0000,704495.0000,710996.0000,706949.0000,704754.0000,705656.0000,704364.0000,705256.0000,740773.0000,726537.0000,725554.0000,727498.0000,726807.0000,728761.0000,717829.0000,703933.0000,706859.0000,703732.0000,707239.0000,717158.0000,706658.0000,706869.0000,707039.0000,706088.0000,708011.0000,717058.0000,709644.0000,708232.0000,709344.0000,707319.0000,707740.0000,740653.0000,707260.0000,709734.0000,707961.0000,706999.0000,713462.0000,706789.0000,706328.0000,705175.0000,707500.0000,708602.0000,714834.0000,707079.0000,704915.0000,705496.0000,705125.0000,713551.0000,705326.0000,703993.0000,702651.0000,706508.0000,705195.0000,761112.0000,830554.0000,829481.0000,828380.0000,826165.0000,835683.0000,782472.0000,705506.0000,706739.0000,704414.0000,718341.0000,703662.0000,704073.0000,703453.0000,702761.0000,703672.0000,712279.0000,701218.0000,703132.0000,702801.0000,700918.0000,701469.0000,706578.0000,702621.0000,703563.0000,700026.0000,705546.0000,706979.0000,702330.0000,706528.0000,703873.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,soup topology,100,1,136686300,1419588.3200,1388181.3700,1470763.6100,200901.2044,130953.5497,285544.8394,"1127597.0000,1412818.0000,2283349.0000,2263662.0000,2275033.0000,2369792.0000,1386087.0000,1457944.0000,1446111.0000,1393751.0000,1418197.0000,1455929.0000,1427486.0000,1485917.0000,1385305.0000,1418398.0000,1456000.0000,1414932.0000,1399523.0000,1395826.0000,1393952.0000,1395415.0000,1421955.0000,1429219.0000,1347794.0000,1425312.0000,1413940.0000,1482951.0000,1422987.0000,1418478.0000,1425562.0000,1447073.0000,1368554.0000,1457752.0000,1396998.0000,1416705.0000,1430371.0000,1414471.0000,1337635.0000,1443396.0000,1397408.0000,1444407.0000,1402107.0000,1432976.0000,1419080.0000,1409702.0000,1387289.0000,1409171.0000,1388632.0000,1394303.0000,1376910.0000,1355980.0000,1451671.0000,1401516.0000,1439889.0000,1393411.0000,1393952.0000,1387640.0000,1402147.0000,1431272.0000,1350289.0000,1399734.0000,1424740.0000,1413879.0000,1398360.0000,1459517.0000,1351632.0000,1365980.0000,1367331.0000,1421183.0000,1430601.0000,1331123.0000,1282902.0000,1295395.0000,1288262.0000,1274235.0000,1317958.0000,1286679.0000,1277922.0000,1273033.0000,1426434.0000,1430261.0000,1394834.0000,1391758.0000,1688862.0000,1480587.0000,1417567.0000,1396787.0000,1454877.0000,1457682.0000,1441572.0000,1354177.0000,1442565.0000,1376289.0000,1177892.0000,1083914.0000,1127937.0000,1106026.0000,1127316.0000,1087100.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,chain topology,100,1,49243800,577368.1200,559652.0200,601201.7000,104205.5075,81592.6733,126529.5105,"583464.0000,561143.0000,876260.0000,842727.0000,841484.0000,875098.0000,840742.0000,870579.0000,870970.0000,838889.0000,846454.0000,840983.0000,869968.0000,749941.0000,580830.0000,549160.0000,577974.0000,587252.0000,548067.0000,553958.0000,580318.0000,547777.0000,551244.0000,584337.0000,583795.0000,528891.0000,564048.0000,554860.0000,534111.0000,602661.0000,529452.0000,532398.0000,532698.0000,529051.0000,531296.0000,535344.0000,558086.0000,586892.0000,561042.0000,555502.0000,504244.0000,499796.0000,500458.0000,500848.0000,527048.0000,515696.0000,493164.0000,508953.0000,513442.0000,513613.0000,507951.0000,499496.0000,508773.0000,496300.0000,503713.0000,497752.0000,505798.0000,508242.0000,514714.0000,496841.0000,488054.0000,490128.0000,509354.0000,502762.0000,500377.0000,495538.0000,522529.0000,515787.0000,494416.0000,546996.0000,558557.0000,561283.0000,561333.0000,555241.0000,532658.0000,609884.0000,558838.0000,559990.0000,562184.0000,556794.0000,561152.0000,583444.0000,559148.0000,531386.0000,562435.0000,531196.0000,557235.0000,585628.0000,527358.0000,587733.0000,561483.0000,558587.0000,556694.0000,559660.0000,555712.0000,535393.0000,586360.0000,557596.0000,534221.0000,555100.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,expanding tree topology,100,1,67260700,722413.8600,702751.6900,749016.4800,116245.5100,91518.0973,142890.0640,"728219.0000,720364.0000,1103762.0000,1029360.0000,1066390.0000,1064256.0000,1043928.0000,1030322.0000,1041784.0000,1031424.0000,1037155.0000,688374.0000,702160.0000,698443.0000,696459.0000,728400.0000,692882.0000,692792.0000,698754.0000,725043.0000,689145.0000,726266.0000,695517.0000,721356.0000,695477.0000,701248.0000,708131.0000,696659.0000,695868.0000,696118.0000,696039.0000,700046.0000,761182.0000,716207.0000,694856.0000,659619.0000,650101.0000,644440.0000,652917.0000,641575.0000,632949.0000,650151.0000,633520.0000,662414.0000,702600.0000,657916.0000,650542.0000,625946.0000,637668.0000,642757.0000,700597.0000,641815.0000,646885.0000,652365.0000,660550.0000,644760.0000,651313.0000,625615.0000,650071.0000,633961.0000,652345.0000,636175.0000,638860.0000,650582.0000,641765.0000,629702.0000,639371.0000,627789.0000,639481.0000,634642.0000,646284.0000,640383.0000,640292.0000,649721.0000,638018.0000,724091.0000,692231.0000,694756.0000,841584.0000,877192.0000,867102.0000,902600.0000,839761.0000,835162.0000,698894.0000,728981.0000,691249.0000,697551.0000,700016.0000,726987.0000,693303.0000,694365.0000,697301.0000,686701.0000,701579.0000,724933.0000,725855.0000,721487.0000,698233.0000,706629.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,contracting tree topology,100,1,67833400,717874.4000,696480.0600,748102.0700,128773.7609,97726.3358,160824.2216,"696970.0000,700517.0000,1116205.0000,1076299.0000,1110364.0000,1081118.0000,1120293.0000,1088623.0000,1103782.0000,1116576.0000,1082832.0000,768636.0000,749290.0000,712910.0000,691099.0000,733379.0000,653388.0000,657735.0000,657596.0000,659980.0000,663556.0000,668205.0000,678245.0000,671422.0000,689315.0000,665961.0000,664449.0000,673255.0000,668707.0000,680248.0000,740032.0000,729542.0000,722479.0000,712930.0000,755621.0000,762845.0000,747926.0000,727918.0000,734230.0000,723169.0000,701488.0000,745062.0000,611799.0000,614434.0000,626957.0000,613382.0000,612220.0000,646915.0000,619584.0000,613001.0000,602781.0000,619443.0000,644410.0000,638279.0000,611578.0000,623721.0000,611448.0000,621778.0000,616558.0000,622729.0000,648418.0000,642146.0000,612230.0000,655952.0000,605075.0000,618130.0000,650341.0000,614062.0000,608943.0000,640844.0000,611438.0000,625725.0000,609204.0000,683785.0000,727618.0000,751403.0000,743999.0000,726456.0000,734482.0000,719302.0000,722428.0000,745862.0000,690417.0000,746784.0000,707049.0000,694125.0000,718862.0000,716076.0000,714493.0000,724633.0000,711718.0000,690398.0000,698203.0000,712349.0000,692312.0000,755030.0000,697672.0000,689937.0000,726687.0000,727638.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,wave_sim topology,100,1,219689600,2256870.0500,2219128.4100,2338772.8800,270953.4614,156648.7670,429712.6568,"2134196.0000,2206724.0000,3796268.0000,3808962.0000,3693183.0000,2148123.0000,2172649.0000,2192015.0000,2156057.0000,2241960.0000,2219818.0000,2227994.0000,2205371.0000,2261277.0000,2234736.0000,2246589.0000,2267188.0000,2224367.0000,2233113.0000,2160065.0000,2154895.0000,2150958.0000,2165025.0000,2145708.0000,2218576.0000,2246839.0000,2264573.0000,2289290.0000,2232873.0000,2238283.0000,2217975.0000,2287307.0000,2151479.0000,2149685.0000,2226621.0000,2132292.0000,2152180.0000,2129407.0000,2159033.0000,2136400.0000,2175223.0000,2155536.0000,2224177.0000,2239637.0000,2240657.0000,2269823.0000,2260566.0000,2296343.0000,2269052.0000,2257300.0000,2241349.0000,2259784.0000,2268381.0000,2258602.0000,2270554.0000,2263952.0000,2264082.0000,2265514.0000,2317894.0000,2254083.0000,2219207.0000,2293778.0000,2159844.0000,2160216.0000,2140197.0000,2191705.0000,2134597.0000,2155777.0000,2134977.0000,2124318.0000,2126431.0000,2145508.0000,2177187.0000,2125179.0000,2213826.0000,2211062.0000,2217364.0000,2223926.0000,2325779.0000,2235819.0000,2258352.0000,2301764.0000,2233244.0000,2261918.0000,2253251.0000,2304308.0000,2239486.0000,2262159.0000,2233013.0000,2255986.0000,2266376.0000,2150587.0000,2143294.0000,2137562.0000,2160806.0000,2134627.0000,2183299.0000,2145668.0000,2160025.0000,2152020.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread,jacobi topology,100,1,99997100,1071575.4400,1038633.1400,1151534.7900,247317.1202,127476.7871,497961.2578,"1014523.0000,1016045.0000,1670868.0000,1659145.0000,1653875.0000,1650167.0000,1657030.0000,1361259.0000,1016275.0000,1015243.0000,1015134.0000,1019853.0000,1012809.0000,1018790.0000,1016577.0000,1044138.0000,1013591.0000,1020193.0000,1011216.0000,1017207.0000,1018059.0000,1009873.0000,1020403.0000,987671.0000,1016026.0000,981089.0000,1001297.0000,986689.0000,1007409.0000,1012949.0000,1011055.0000,985387.0000,1008110.0000,986800.0000,999474.0000,1016146.0000,1015965.0000,1015534.0000,1016325.0000,1014612.0000,1016827.0000,1016647.0000,1016236.0000,1042165.0000,1019011.0000,1012919.0000,1014753.0000,1017738.0000,1046703.0000,1044499.0000,1016836.0000,1016145.0000,1015915.0000,1017698.0000,1011176.0000,1020223.0000,1014602.0000,3069670.0000,1075608.0000,1015114.0000,1014803.0000,1018961.0000,1044549.0000,1041444.0000,1017698.0000,1015153.0000,1015804.0000,1046883.0000,1009433.0000,981760.0000,1012228.0000,1010494.0000,1009814.0000,1018459.0000,977332.0000,1016145.0000,986570.0000,1009673.0000,1007569.0000,990096.0000,981740.0000,1012689.0000,1009352.0000,1012538.0000,1008681.0000,1013471.0000,982862.0000,1021035.0000,1043007.0000,1014562.0000,1043787.0000,1014221.0000,1048637.0000,1049248.0000,1017338.0000,1012308.0000,1017368.0000,1045481.0000,1012128.0000,1016927.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,soup topology,100,1,301104100,2994235.0100,2963014.0000,3021262.4200,147816.9503,129864.1620,161491.6620,"2732781.0000,2749704.0000,2738001.0000,2742269.0000,2737790.0000,2737801.0000,2859852.0000,3090038.0000,3078066.0000,3094137.0000,3088115.0000,3082555.0000,2864631.0000,2736138.0000,2743392.0000,2743051.0000,2751136.0000,2743291.0000,2733493.0000,2736969.0000,2742149.0000,2737790.0000,2739263.0000,2949683.0000,3090600.0000,3085349.0000,3075751.0000,3082946.0000,3080441.0000,3082845.0000,3079779.0000,3082695.0000,3084779.0000,3091361.0000,3070051.0000,3084849.0000,3086131.0000,3085781.0000,3087764.0000,3093916.0000,3080922.0000,3086532.0000,3079398.0000,3080441.0000,3087253.0000,3078247.0000,3089488.0000,3086913.0000,3090330.0000,3089367.0000,3085801.0000,3077436.0000,3080501.0000,3081352.0000,3090460.0000,3083988.0000,3075601.0000,3121027.0000,3085641.0000,3086512.0000,3075542.0000,3081623.0000,3084749.0000,3080310.0000,3088847.0000,3080060.0000,3084277.0000,3083987.0000,3078828.0000,3079578.0000,3089978.0000,3090761.0000,3077445.0000,3079479.0000,3083927.0000,3078326.0000,3084709.0000,3086622.0000,3080611.0000,3088947.0000,3082916.0000,3084999.0000,3087454.0000,3083217.0000,3088887.0000,3091501.0000,3078727.0000,3086202.0000,3083196.0000,3078627.0000,3087343.0000,2895060.0000,2735486.0000,2751115.0000,2739604.0000,2735316.0000,2746838.0000,2734455.0000,2743962.0000,2735647.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,chain topology,100,1,66068700,647766.5100,642641.9600,652908.0900,26379.2167,25823.0368,26953.0590,"674298.0000,674116.0000,632598.0000,631365.0000,677483.0000,674677.0000,674057.0000,674477.0000,673275.0000,679506.0000,673676.0000,674748.0000,676021.0000,676401.0000,675128.0000,680710.0000,675509.0000,673616.0000,674598.0000,675720.0000,674127.0000,679827.0000,672945.0000,673956.0000,674678.0000,674638.0000,674467.0000,676792.0000,674768.0000,674287.0000,674948.0000,672974.0000,675149.0000,675249.0000,672834.0000,674458.0000,673866.0000,674277.0000,675490.0000,676200.0000,669668.0000,622268.0000,620495.0000,620535.0000,621868.0000,624152.0000,621066.0000,621026.0000,621587.0000,620445.0000,621687.0000,620484.0000,627528.0000,621637.0000,620535.0000,620676.0000,620545.0000,621457.0000,623841.0000,621337.0000,620725.0000,621037.0000,620485.0000,618581.0000,618171.0000,625144.0000,617800.0000,619944.0000,621567.0000,621357.0000,620435.0000,626857.0000,621917.0000,621827.0000,620485.0000,619724.0000,620755.0000,620866.0000,629552.0000,621938.0000,621738.0000,622629.0000,622399.0000,620645.0000,628110.0000,622909.0000,623551.0000,622449.0000,622028.0000,622479.0000,639270.0000,677614.0000,674427.0000,674938.0000,675840.0000,673074.0000,676892.0000,675309.0000,675649.0000,674788.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,expanding tree topology,100,1,74393500,758476.2000,757639.6200,761364.2800,7051.8597,2366.8418,15954.0230,"754910.0000,759148.0000,824201.0000,764117.0000,764228.0000,759148.0000,757445.0000,757004.0000,755381.0000,757625.0000,757545.0000,757475.0000,758126.0000,756513.0000,755892.0000,763546.0000,758186.0000,756263.0000,755080.0000,753588.0000,758496.0000,756082.0000,755651.0000,758045.0000,756943.0000,760941.0000,756313.0000,757735.0000,756082.0000,756103.0000,761442.0000,758086.0000,754810.0000,756292.0000,757274.0000,757074.0000,759920.0000,756803.0000,755982.0000,757725.0000,757595.0000,758947.0000,758337.0000,757615.0000,755301.0000,757766.0000,763276.0000,758557.0000,757174.0000,754048.0000,757064.0000,754540.0000,757615.0000,756132.0000,755531.0000,755642.0000,756402.0000,762855.0000,754429.0000,754840.0000,762575.0000,759178.0000,764879.0000,759168.0000,756744.0000,758186.0000,756403.0000,762654.0000,761713.0000,757625.0000,756032.0000,756653.0000,757485.0000,762053.0000,757345.0000,758136.0000,755632.0000,757305.0000,756984.0000,757875.0000,756353.0000,759088.0000,756132.0000,761763.0000,756132.0000,757565.0000,757164.0000,755301.0000,756903.0000,762435.0000,756884.0000,755541.0000,755621.0000,758537.0000,762103.0000,756744.0000,756553.0000,756212.0000,756734.0000,762344.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,contracting tree topology,100,1,79924800,796035.1000,792359.7700,798138.3000,13798.9748,8707.3718,19577.1194,"799054.0000,795888.0000,746745.0000,736645.0000,734872.0000,735233.0000,733880.0000,788213.0000,801078.0000,798823.0000,798904.0000,798703.0000,804995.0000,800516.0000,798031.0000,798252.0000,795987.0000,805476.0000,796780.0000,799875.0000,796709.0000,799595.0000,804294.0000,798142.0000,798703.0000,798273.0000,799274.0000,802981.0000,798222.0000,797070.0000,796940.0000,797090.0000,804714.0000,797521.0000,800186.0000,798483.0000,798041.0000,804934.0000,797311.0000,798352.0000,799374.0000,799604.0000,805316.0000,799395.0000,797360.0000,798313.0000,797761.0000,804654.0000,799334.0000,796589.0000,797861.0000,798403.0000,800356.0000,794956.0000,796058.0000,799875.0000,797130.0000,805907.0000,798593.0000,796730.0000,798452.0000,798342.0000,804484.0000,798352.0000,797781.0000,796989.0000,798843.0000,805355.0000,797630.0000,797069.0000,799033.0000,797160.0000,803011.0000,798412.0000,797250.0000,798473.0000,799504.0000,807109.0000,796318.0000,798754.0000,798773.0000,799094.0000,807339.0000,795957.0000,798613.0000,797140.0000,797651.0000,801768.0000,799745.0000,795367.0000,797471.0000,797841.0000,803162.0000,796379.0000,798903.0000,795928.0000,798273.0000,803091.0000,796409.0000,797931.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,wave_sim topology,100,1,419368100,4202760.4500,4172170.8200,4229944.4500,146182.7809,132041.7925,156844.7876,"4299102.0000,4284995.0000,4351611.0000,4314793.0000,4300093.0000,4309491.0000,4301828.0000,4297249.0000,4297388.0000,4310704.0000,4298732.0000,4291497.0000,4294855.0000,4292900.0000,4169707.0000,3983433.0000,3970670.0000,3958395.0000,3967583.0000,3968045.0000,3967463.0000,3964507.0000,4271520.0000,4296668.0000,4294132.0000,4294474.0000,4292279.0000,4076740.0000,3979586.0000,3975689.0000,3966521.0000,3975659.0000,3974496.0000,3967654.0000,4064738.0000,4294303.0000,4300886.0000,4298751.0000,4298781.0000,4292339.0000,4290095.0000,4287420.0000,4296227.0000,4062974.0000,3987521.0000,3995476.0000,3982261.0000,4265798.0000,4295434.0000,4291187.0000,4023610.0000,3986108.0000,3981250.0000,3976901.0000,3981439.0000,3977872.0000,3977212.0000,4302879.0000,4303831.0000,4312007.0000,4359316.0000,4325411.0000,4299193.0000,4300976.0000,4304252.0000,4294614.0000,3987431.0000,3989414.0000,4002550.0000,3984646.0000,3984796.0000,4230742.0000,4303571.0000,4300504.0000,4296967.0000,4310133.0000,4302349.0000,4298200.0000,4306987.0000,4302829.0000,4300114.0000,4305444.0000,4306547.0000,4300705.0000,4294013.0000,4298451.0000,4302779.0000,4303160.0000,4299984.0000,4297840.0000,4424099.0000,4297339.0000,4295776.0000,4294183.0000,4295665.0000,4298962.0000,4304884.0000,4294102.0000,4294033.0000,4295325.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task,jacobi topology,100,1,130927200,1314388.6200,1304095.3400,1322691.1900,47234.1436,39604.2291,53912.3582,"1327025.0000,1215754.0000,1344178.0000,1337445.0000,1341442.0000,1336183.0000,1334779.0000,1337384.0000,1335912.0000,1334119.0000,1342945.0000,1337786.0000,1334289.0000,1343977.0000,1335230.0000,1335211.0000,1341222.0000,1335110.0000,1335521.0000,1341142.0000,1332355.0000,1332846.0000,1337976.0000,1334740.0000,1333958.0000,1340280.0000,1332175.0000,1334830.0000,1395876.0000,1336152.0000,1333718.0000,1340340.0000,1337254.0000,1303661.0000,1219010.0000,1222616.0000,1217377.0000,1229059.0000,1215163.0000,1214531.0000,1224270.0000,1216165.0000,1221765.0000,1225543.0000,1220763.0000,1211696.0000,1215723.0000,1220132.0000,1212227.0000,1213119.0000,1218980.0000,1211356.0000,1336974.0000,1334559.0000,1336443.0000,1333758.0000,1340520.0000,1334950.0000,1331864.0000,1343476.0000,1339008.0000,1334219.0000,1344307.0000,1337995.0000,1334740.0000,1341062.0000,1334680.0000,1335531.0000,1341573.0000,1337014.0000,1337485.0000,1337324.0000,1337204.0000,1336904.0000,1338947.0000,1335451.0000,1335792.0000,1341893.0000,1333678.0000,1332505.0000,1335351.0000,1335201.0000,1332986.0000,1338407.0000,1336022.0000,1333297.0000,1340631.0000,1335050.0000,1335491.0000,1335872.0000,1335150.0000,1335622.0000,1340791.0000,1332385.0000,1331564.0000,1343346.0000,1335080.0000,1331694.0000,1337465.0000,1335291.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,soup topology,100,1,251872600,2462239.7500,2432565.0400,2489835.5300,145359.3162,128227.9272,167717.9476,"2241920.0000,2259253.0000,2573339.0000,2548622.0000,2563641.0000,2516842.0000,2496233.0000,2499268.0000,2499168.0000,2533603.0000,2486213.0000,2535877.0000,2513274.0000,2477036.0000,2488679.0000,2465936.0000,2896562.0000,2667257.0000,2632430.0000,2633603.0000,2639455.0000,2665374.0000,2632130.0000,2662177.0000,2673699.0000,2616511.0000,2641749.0000,2657228.0000,2543602.0000,2554393.0000,2553050.0000,2558742.0000,2487096.0000,2528894.0000,2326500.0000,2207195.0000,2228505.0000,2232112.0000,2461848.0000,2502124.0000,2496673.0000,2499268.0000,2498046.0000,2460455.0000,2499349.0000,2557659.0000,2548391.0000,2596253.0000,2575242.0000,2522312.0000,2539495.0000,2519306.0000,2466867.0000,2530067.0000,2519607.0000,2371927.0000,2207215.0000,2235207.0000,2229446.0000,2212434.0000,2234145.0000,2242392.0000,2214869.0000,2209028.0000,2210040.0000,2184451.0000,2239887.0000,2249475.0000,2211573.0000,2327182.0000,2494960.0000,2496954.0000,2525328.0000,2503918.0000,2491604.0000,2527211.0000,2495140.0000,2524987.0000,2502054.0000,2523184.0000,2495943.0000,2520829.0000,2533833.0000,2486494.0000,2505360.0000,2495631.0000,2488538.0000,2504669.0000,2490712.0000,2498086.0000,2504608.0000,2552069.0000,2496603.0000,2527852.0000,2498908.0000,2378950.0000,2190743.0000,2231010.0000,2208026.0000,2220370.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,chain topology,100,1,56843400,589295.6000,577031.1000,604980.8300,70639.2984,56914.6608,84931.8117,"608071.0000,581622.0000,763065.0000,749860.0000,768916.0000,749490.0000,806498.0000,773415.0000,744279.0000,750461.0000,771802.0000,784496.0000,767604.0000,744630.0000,589356.0000,537988.0000,564649.0000,553407.0000,529492.0000,548157.0000,547516.0000,550763.0000,505447.0000,506899.0000,514123.0000,508653.0000,531797.0000,509594.0000,529793.0000,521738.0000,511208.0000,531516.0000,532307.0000,507220.0000,507771.0000,511007.0000,529703.0000,509474.0000,572324.0000,558137.0000,584527.0000,552526.0000,557735.0000,558587.0000,551975.0000,580920.0000,586531.0000,557045.0000,584737.0000,586540.0000,558827.0000,584396.0000,561172.0000,556323.0000,586391.0000,556594.0000,585800.0000,586320.0000,586200.0000,639200.0000,582082.0000,579587.0000,579257.0000,583966.0000,578015.0000,576792.0000,554159.0000,580399.0000,577814.0000,585177.0000,572003.0000,585569.0000,584837.0000,641044.0000,559519.0000,585047.0000,557305.0000,561754.0000,582292.0000,563497.0000,553298.0000,588564.0000,581631.0000,560972.0000,584397.0000,586611.0000,559359.0000,605757.0000,610907.0000,608723.0000,580459.0000,581000.0000,576772.0000,582313.0000,581341.0000,577593.0000,581501.0000,610486.0000,580398.0000,580769.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,expanding tree topology,100,1,67649600,737075.6700,718350.5900,762737.9900,111095.3393,86556.9715,138125.3570,"751283.0000,698953.0000,1106427.0000,1067854.0000,1054799.0000,1083133.0000,1072382.0000,1037686.0000,1061441.0000,1035983.0000,946473.0000,720835.0000,729872.0000,749129.0000,698262.0000,728820.0000,755521.0000,725003.0000,789356.0000,682873.0000,660140.0000,664308.0000,637948.0000,656343.0000,673586.0000,663396.0000,667393.0000,664508.0000,659609.0000,668365.0000,656443.0000,697821.0000,672724.0000,641285.0000,660821.0000,668917.0000,705566.0000,721677.0000,738228.0000,694355.0000,722599.0000,710766.0000,698964.0000,742346.0000,738629.0000,689075.0000,690718.0000,716236.0000,717739.0000,693874.0000,717850.0000,703743.0000,711297.0000,726125.0000,721447.0000,693133.0000,849399.0000,840933.0000,865289.0000,651985.0000,651965.0000,661964.0000,688985.0000,660642.0000,694445.0000,666963.0000,672604.0000,662665.0000,664118.0000,641996.0000,684877.0000,656153.0000,675339.0000,664368.0000,669198.0000,668856.0000,685909.0000,646584.0000,662405.0000,660210.0000,651163.0000,655612.0000,755772.0000,710506.0000,723240.0000,710836.0000,720625.0000,719082.0000,745192.0000,714934.0000,716167.0000,725995.0000,717038.0000,773024.0000,744551.0000,702571.0000,844279.0000,872763.0000,838669.0000,753637.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,contracting tree topology,100,1,70902500,755668.8900,736902.0400,781925.4600,111778.1916,84854.1948,140276.7390,"655291.0000,667614.0000,1131123.0000,1081369.0000,1069496.0000,1070258.0000,1084055.0000,1129821.0000,1080898.0000,1073905.0000,1073725.0000,778134.0000,742536.0000,760180.0000,747135.0000,734812.0000,759980.0000,745181.0000,786800.0000,782612.0000,779367.0000,726195.0000,727327.0000,762354.0000,771181.0000,730553.0000,757685.0000,755521.0000,777342.0000,726325.0000,726876.0000,730133.0000,782532.0000,750883.0000,731325.0000,739009.0000,729050.0000,724332.0000,724392.0000,725484.0000,756683.0000,724502.0000,726626.0000,734902.0000,786901.0000,727248.0000,722749.0000,753297.0000,755591.0000,777313.0000,723360.0000,725364.0000,724181.0000,764207.0000,821507.0000,749820.0000,747566.0000,733018.0000,725844.0000,721035.0000,724772.0000,755992.0000,788393.0000,722278.0000,726967.0000,785649.0000,747496.0000,762555.0000,751584.0000,723551.0000,758567.0000,721637.0000,677823.0000,689186.0000,670309.0000,664439.0000,726927.0000,662975.0000,678765.0000,667575.0000,656142.0000,687772.0000,669869.0000,682423.0000,678064.0000,666001.0000,681902.0000,685147.0000,692521.0000,668897.0000,674688.0000,656844.0000,659139.0000,662855.0000,671352.0000,671572.0000,671652.0000,673104.0000,661462.0000,653538.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,wave_sim topology,100,1,325825300,3155609.8000,3128654.6600,3195157.2900,164178.5577,116381.3807,248759.7182,"3163258.0000,3161305.0000,3194006.0000,3161675.0000,3173217.0000,3160733.0000,3198465.0000,3136066.0000,3192062.0000,3141807.0000,3155523.0000,3124324.0000,3171373.0000,3146376.0000,3220866.0000,3161584.0000,3192803.0000,2942719.0000,2946738.0000,2925236.0000,2926449.0000,2939082.0000,2944172.0000,2901933.0000,2921289.0000,3044673.0000,3189908.0000,3255021.0000,3191601.0000,3193174.0000,3193375.0000,3192192.0000,3192603.0000,3192163.0000,3195799.0000,3170180.0000,3156254.0000,3172726.0000,3007773.0000,2940997.0000,2911691.0000,2920958.0000,2898195.0000,2940255.0000,2933863.0000,3880298.0000,4082812.0000,3242718.0000,3181662.0000,3169390.0000,3214456.0000,3103274.0000,3117351.0000,3165161.0000,3167786.0000,3185851.0000,3162516.0000,3142007.0000,3137258.0000,3187112.0000,3169550.0000,3165863.0000,3150674.0000,3138641.0000,3202702.0000,3160794.0000,3195660.0000,3134042.0000,3169729.0000,3160702.0000,3133331.0000,3354109.0000,3458788.0000,3404165.0000,3461423.0000,3407421.0000,3176954.0000,3195349.0000,3180420.0000,3168117.0000,3213643.0000,3203816.0000,3116459.0000,3176904.0000,3210988.0000,3166814.0000,2932421.0000,2938000.0000,3001450.0000,3150834.0000,3169389.0000,3164831.0000,3166604.0000,3167024.0000,3143119.0000,3139643.0000,3166674.0000,3094238.0000,3139322.0000,3174259.0000" +building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task,jacobi topology,100,1,104477600,1059364.7600,1036986.8400,1095800.8100,142363.1364,93035.6284,193655.4235,"964127.0000,969747.0000,1637524.0000,1604371.0000,1596667.0000,1594883.0000,1595615.0000,1575276.0000,971040.0000,987781.0000,970619.0000,978493.0000,990336.0000,979376.0000,958757.0000,981109.0000,1000806.0000,970229.0000,970198.0000,1032616.0000,1050119.0000,1043928.0000,1017618.0000,1047023.0000,1073043.0000,1074316.0000,1044950.0000,1043968.0000,1038758.0000,1046703.0000,1043578.0000,1045922.0000,1081760.0000,1046433.0000,1040312.0000,1046393.0000,1072362.0000,1077622.0000,1074676.0000,1043047.0000,1013140.0000,1046082.0000,1077352.0000,1043658.0000,1043517.0000,1018540.0000,1102129.0000,1016847.0000,1037757.0000,1049359.0000,1071130.0000,1076790.0000,1073053.0000,1108781.0000,1041934.0000,1046052.0000,1045150.0000,1041263.0000,1077141.0000,1045000.0000,1043777.0000,1075487.0000,1046403.0000,1068865.0000,1047154.0000,1046283.0000,1014012.0000,1017548.0000,1043717.0000,1075117.0000,1041414.0000,1018560.0000,1071791.0000,1045602.0000,1070949.0000,1047735.0000,1045692.0000,1042866.0000,1044980.0000,1049138.0000,1044539.0000,970799.0000,971802.0000,970098.0000,970629.0000,1000656.0000,985577.0000,974006.0000,960720.0000,965319.0000,977803.0000,955570.0000,970489.0000,969457.0000,958806.0000,968515.0000,982281.0000,960510.0000,987682.0000,977352.0000" +normalizing randomized box sets - 2d,"small, native",100,34,2437800,636.5997,635.9482,638.5209,5.1944,2.0246,11.1469,"636.1471,637.0588,682.1176,652.3529,649.7059,638.5000,635.5588,634.3824,636.1471,634.6765,635.2647,635.5588,635.2647,634.6765,633.5000,634.9706,635.8529,635.5588,634.6765,634.6765,635.8529,635.2647,636.1471,634.0882,634.9706,634.9706,635.5588,635.5588,634.3824,637.0294,633.2059,633.7941,634.9706,635.2647,635.5588,643.2353,634.9706,634.9706,635.5588,636.4412,635.5588,636.4412,636.4412,636.1471,636.7353,634.9706,636.4412,636.4412,637.3235,636.1765,634.9706,637.0294,635.8529,636.4412,634.6765,635.5588,636.1471,636.1471,634.9706,634.9706,637.3235,636.7353,634.6765,634.6765,635.2647,637.0294,636.1471,634.3824,637.9412,636.1471,635.8529,634.6765,635.5588,635.5588,635.8529,637.3235,635.2647,635.5588,635.8529,635.5588,635.2647,635.5588,636.7353,637.3235,636.4412,634.0882,636.4412,637.0294,637.0294,636.4412,636.4412,637.0294,637.0294,636.4412,634.9706,635.5588,636.4412,635.2647,635.5588,637.6176" +normalizing randomized box sets - 2d,"small, embedded in 3d",100,31,2486200,732.6913,730.5729,741.7765,18.9831,1.6875,44.4461,"731.9677,731.6452,768.1613,731.9677,731.3226,730.6774,727.7742,730.3548,730.0323,729.0645,729.7097,730.0323,729.3871,727.7742,730.6774,729.0645,729.7097,729.7097,727.7742,729.7097,731.0000,729.0645,729.0645,728.7419,729.0645,728.4194,729.7097,731.3226,729.3871,728.7419,730.0323,729.3871,729.7097,730.3548,728.7419,728.7419,729.7097,729.3871,729.3871,729.0645,728.7419,728.7419,729.0645,729.3871,729.3871,728.7419,727.0968,728.0645,730.0323,730.0000,730.0323,729.0323,727.4194,728.7419,727.7742,729.0645,729.3871,728.0968,728.7419,729.7097,729.3871,917.1613,732.9355,731.6452,732.9677,731.9677,732.2903,731.6452,733.5806,730.6774,732.6129,731.9677,732.6129,733.2581,730.6774,731.3226,731.6452,732.6452,731.3226,731.9677,731.6452,732.2903,732.6129,731.9677,732.3226,731.6452,731.6452,733.2581,732.6129,732.2903,732.3226,730.6774,733.2581,732.2903,731.6452,731.9677,732.2903,731.0000,732.6452,731.6452" +normalizing randomized box sets - 2d,"medium, native",100,4,3024800,6953.2050,6884.9100,7038.6400,387.6856,332.2777,473.9577,"6742.2500,6719.7500,8390.5000,7566.5000,7583.7500,7576.5000,7611.5000,7566.2500,7549.0000,7538.7500,7526.2500,7538.7500,7556.2500,7536.2500,7516.2500,7561.5000,7528.7500,7626.5000,7553.7500,7519.0000,7506.2500,7523.7500,7496.2500,7536.2500,7556.5000,8177.5000,6890.0000,7298.5000,6817.2500,6780.0000,6727.2500,6719.7500,6727.2500,6709.7500,6724.7500,6757.2500,6737.2500,6729.7500,6742.2500,6745.0000,6699.7500,6724.7500,6692.2500,6729.7500,6709.7500,6729.7500,6709.7500,6754.7500,6722.2500,6742.5000,6762.2500,6742.2500,6747.5000,6722.2500,6742.2500,6722.2500,6722.2500,6722.2500,6732.2500,6752.2500,6717.2500,6742.2500,6742.2500,6722.2500,6727.2500,6734.7500,6784.7500,6747.5000,6697.2500,6739.7500,6719.7500,6717.2500,6734.7500,6742.2500,6717.2500,6735.0000,6724.7500,6724.7500,6779.7500,6730.0000,6782.2500,6767.5000,6762.2500,6772.2500,6757.5000,6722.2500,6739.7500,6717.2500,6719.7500,6742.5000,6734.7500,6722.2500,6719.7500,6769.7500,6737.5000,6724.7500,6732.2500,6722.2500,6767.2500,6732.5000" +normalizing randomized box sets - 2d,"medium, embedded in 3d",100,3,2428800,7390.8933,7352.6900,7488.3767,277.6676,40.5469,535.7590,"7366.6667,7309.6667,9610.6667,7547.0000,7407.0000,7413.3333,7356.6667,7393.3333,7340.0000,7380.0000,7373.3333,7340.0000,7366.6667,7366.6667,7353.3333,7376.6667,7366.6667,7366.6667,7390.0000,7383.3333,7343.3333,7360.0000,7363.3333,7350.0000,7356.6667,7373.3333,7353.3333,7356.6667,7363.3333,7363.3333,7366.6667,7353.3333,7363.3333,7330.0000,7366.6667,7373.3333,7323.3333,7333.0000,7309.6667,7350.0000,7326.6667,7363.3333,7356.6667,7356.6667,7373.3333,7310.0000,7346.3333,7349.6667,7360.0000,7350.0000,8969.6667,7577.0000,7343.3333,7363.3333,7340.0000,7333.0000,7309.6667,7340.0000,7320.0000,7323.3333,7346.3333,7299.6667,7330.0000,7313.3333,7319.6667,7323.3333,7330.0000,7353.3333,7330.0000,7323.3333,7343.0000,7296.6667,7333.3333,7323.3333,7313.0000,7353.3333,7340.0000,7370.0000,7350.0000,7346.6667,7306.6667,7323.0000,7343.0000,7323.3333,7330.0000,7330.0000,7350.0000,7360.0000,7366.6667,7376.6667,7383.3333,7363.3333,7333.3333,7356.6667,7333.0000,7329.6667,7333.3333,7333.3333,7380.0000,7323.3333" +normalizing randomized box sets - 2d,"large, native",100,1,26353800,267138.9800,266959.8600,267391.3000,1081.5554,813.2716,1379.1906,"266393.0000,266103.0000,270371.0000,267926.0000,267566.0000,266914.0000,266484.0000,270771.0000,266192.0000,267114.0000,267425.0000,267164.0000,266564.0000,266624.0000,266563.0000,266142.0000,266274.0000,267415.0000,266944.0000,266293.0000,266003.0000,266253.0000,270511.0000,266374.0000,266764.0000,266604.0000,267225.0000,266624.0000,266623.0000,266905.0000,266513.0000,267045.0000,266534.0000,266884.0000,267445.0000,266673.0000,266473.0000,270501.0000,267486.0000,266734.0000,266844.0000,267455.0000,267846.0000,266334.0000,267205.0000,266333.0000,266884.0000,267055.0000,266453.0000,266333.0000,267305.0000,266864.0000,270351.0000,266473.0000,265832.0000,267375.0000,266794.0000,267525.0000,266604.0000,266634.0000,267525.0000,266464.0000,266664.0000,267716.0000,267044.0000,267165.0000,267916.0000,270541.0000,266283.0000,266764.0000,267245.0000,267054.0000,266604.0000,266604.0000,266543.0000,266844.0000,267205.0000,266623.0000,266805.0000,266663.0000,267174.0000,266764.0000,271163.0000,267245.0000,266764.0000,266934.0000,266945.0000,266493.0000,266684.0000,266644.0000,266584.0000,267325.0000,266834.0000,266954.0000,266935.0000,267084.0000,266704.0000,269750.0000,267666.0000,267065.0000" +normalizing randomized box sets - 2d,"large, embedded in 3d",100,1,28055000,275672.2100,274282.5600,277137.7700,7292.9348,6900.4886,7756.0172,"268808.0000,270361.0000,290058.0000,285380.0000,283465.0000,283867.0000,283466.0000,283756.0000,283706.0000,283836.0000,288755.0000,284067.0000,283346.0000,284237.0000,283526.0000,282874.0000,283225.0000,283055.0000,283025.0000,283275.0000,284107.0000,284267.0000,283797.0000,283425.0000,288455.0000,283495.0000,283346.0000,283766.0000,283526.0000,283125.0000,284067.0000,283726.0000,284207.0000,284288.0000,284026.0000,283736.0000,283275.0000,283045.0000,287764.0000,283947.0000,283766.0000,288095.0000,281131.0000,270341.0000,269239.0000,269359.0000,269519.0000,269569.0000,269860.0000,269119.0000,269519.0000,269559.0000,275631.0000,269780.0000,269429.0000,269309.0000,269479.0000,268989.0000,269178.0000,269689.0000,269369.0000,269760.0000,270181.0000,270140.0000,270111.0000,268627.0000,269510.0000,275220.0000,269469.0000,269359.0000,269219.0000,269078.0000,269630.0000,269970.0000,268647.0000,268999.0000,269940.0000,269860.0000,268888.0000,269719.0000,269069.0000,268497.0000,273016.0000,268958.0000,269209.0000,269058.0000,268798.0000,269239.0000,270060.0000,268929.0000,268968.0000,268537.0000,269179.0000,269449.0000,269289.0000,268968.0000,269990.0000,273196.0000,269759.0000,269319.0000" +normalizing randomized box sets - 3d,small - native,100,8,2456000,3168.2850,3159.2663,3191.3488,66.1072,12.6765,123.9383,"3159.5000,3179.5000,3559.0000,3195.7500,3179.6250,3188.2500,3179.5000,3173.2500,3183.2500,3182.1250,3183.2500,3197.1250,3155.7500,3177.0000,3180.7500,3167.0000,3163.3750,3159.5000,3168.2500,3167.0000,3164.5000,3158.2500,3163.2500,3160.7500,3164.5000,3140.7500,3158.2500,3150.7500,3147.0000,3140.7500,3164.5000,3154.5000,3144.5000,3138.1250,3158.1250,3149.3750,3149.5000,3139.5000,3147.0000,3153.2500,3148.2500,3163.2500,3143.2500,3148.1250,3155.6250,3155.6250,3140.6250,3168.2500,3165.7500,3157.0000,3157.0000,3152.0000,3164.5000,3155.7500,3169.5000,3158.2500,3157.0000,3152.0000,3157.0000,3157.0000,3152.0000,3144.5000,3136.8750,3142.0000,3152.0000,3155.7500,3157.0000,3164.5000,3149.5000,3148.2500,3163.2500,3157.0000,3155.7500,3150.7500,3172.0000,3173.3750,3157.0000,3164.5000,3144.3750,3145.6250,3142.0000,3160.7500,3157.0000,3158.2500,3159.5000,3140.7500,3163.2500,3150.7500,3159.5000,3165.7500,3164.5000,3158.2500,3679.1250,3148.2500,3145.7500,3164.5000,3170.7500,3162.0000,3165.7500,3154.5000" +normalizing randomized box sets - 3d,medium - native,100,3,3599400,11216.2233,11184.3467,11362.9933,298.8722,31.5574,709.5565,"11197.3333,11160.3333,14169.6667,11371.0000,11274.0000,11177.3333,11207.3333,11157.0000,11200.6667,11200.6667,11190.6667,11163.6667,11167.3333,11163.6667,11150.6667,11160.6667,11170.3333,11164.0000,11180.6667,11183.6667,11200.6667,11190.6667,11170.6667,11217.3333,11193.6667,11184.0000,11187.3333,11184.0000,11163.6667,11197.3333,11204.0000,11184.0000,11173.6667,11217.3333,11167.3333,11163.6667,11200.3333,11160.6667,11257.3333,11217.3333,11234.0000,11197.3333,11160.3333,11183.6667,11180.6667,11167.3333,11177.0000,11134.0000,11240.6667,11153.6667,11147.3333,11133.6667,11204.0000,11144.0000,11187.0000,11120.3333,11170.6667,11170.3333,11144.0000,11130.3333,11154.0000,11204.0000,11130.3333,11174.0000,11167.0000,11174.0000,11274.0000,11194.0000,11154.0000,11163.6667,11164.0000,11207.3333,11173.6667,11230.6667,11177.3333,11200.6667,11217.3333,11197.3333,11210.6667,11180.3333,11247.3333,11194.0000,11200.6667,11234.0000,11150.6667,11230.6667,11207.0000,11207.0000,11187.3333,11177.3333,11187.3333,11173.6667,11210.6667,11160.6667,11183.6667,11197.0000,11130.6667,11204.0000,11193.6667,11170.6667" +normalizing randomized box sets - 3d,large - native,100,1,304345900,3057791.5200,3038193.0800,3072666.1800,86470.8205,69202.9034,102695.1706,"3104225.0000,3095970.0000,3104086.0000,3100919.0000,3086683.0000,3097402.0000,3095810.0000,3092844.0000,3096200.0000,3096380.0000,3151025.0000,3095870.0000,3088626.0000,3094277.0000,3088877.0000,3096571.0000,3100599.0000,3086753.0000,3093244.0000,3103524.0000,3085520.0000,3099688.0000,3099266.0000,3097804.0000,3101300.0000,3104586.0000,3093385.0000,3091531.0000,3094848.0000,3101340.0000,3090239.0000,3094998.0000,3092433.0000,3092583.0000,3089658.0000,3083236.0000,3093606.0000,3090369.0000,3089809.0000,3099917.0000,3091401.0000,3100709.0000,3090079.0000,3098265.0000,3092473.0000,3089788.0000,3092564.0000,3100889.0000,3100008.0000,3098675.0000,3088887.0000,3099978.0000,3096561.0000,3094888.0000,3100339.0000,3097903.0000,3097613.0000,3090460.0000,3083136.0000,3104195.0000,3097714.0000,3101550.0000,3092764.0000,3091572.0000,3100208.0000,3092124.0000,3089658.0000,3011299.0000,2851266.0000,2859602.0000,2833382.0000,2848451.0000,2857258.0000,2852849.0000,2848009.0000,2844393.0000,2859862.0000,2852698.0000,2849162.0000,2895280.0000,3100479.0000,3098855.0000,3076833.0000,2999107.0000,2828011.0000,2820398.0000,2952518.0000,3094077.0000,3088396.0000,3097162.0000,3091301.0000,3093425.0000,3100118.0000,3085180.0000,3088146.0000,3095629.0000,3098856.0000,3090600.0000,3096330.0000,3089818.0000" +normalizing a fully mergeable tiling of boxes - 1,"small, native",100,655,2358000,36.9922,36.9452,37.0660,0.2936,0.2070,0.5123,"36.8153,37.0443,38.9710,36.8611,36.8305,36.8153,36.9069,36.7695,37.3344,37.0748,37.4870,36.8916,36.9832,37.2427,36.9359,36.7069,36.6015,37.1206,37.4275,37.1053,36.9374,36.8000,37.1969,37.0137,37.2733,36.7527,36.9069,36.7237,36.8916,37.1969,36.9985,37.2901,37.3344,36.8000,37.3511,37.1053,36.9374,37.1817,37.2733,37.0901,36.9679,36.2794,36.7695,36.9374,36.8916,36.8916,36.8137,37.0916,36.9069,37.0901,36.6916,36.8290,36.8611,36.9221,36.9069,36.7695,36.9679,36.8305,37.0137,36.9374,36.9221,36.9832,36.7542,36.9679,36.7084,36.9527,36.9527,36.8901,36.9527,36.8443,36.9374,36.8000,36.9832,36.8153,36.9985,36.5405,36.5542,37.4275,37.1664,37.3053,36.6458,36.8916,37.0901,37.1985,37.2733,37.2137,36.6153,36.9221,37.2595,36.5542,36.9985,37.1664,37.1069,37.2275,36.9985,37.3053,36.9053,36.9374,36.9679,37.1359" +normalizing a fully mergeable tiling of boxes - 1,"small, embedded in 3d",100,437,2403500,55.8991,55.8127,56.1242,0.6643,0.3259,1.4089,"55.6407,55.9588,56.4645,55.9611,55.9130,55.6842,55.9817,56.1899,55.6842,55.4554,56.0297,56.2563,55.7780,55.9588,55.9840,55.6384,55.4554,55.9382,55.5927,55.9359,56.3272,56.1899,56.1648,56.4416,56.1213,55.9130,55.9611,55.9817,56.0755,56.1899,56.3959,56.1876,55.9611,55.9130,56.1442,56.2128,56.0046,56.1899,56.3959,56.0046,55.9382,56.0961,56.4188,56.4188,55.6842,55.8696,56.2792,56.1190,56.0297,56.3249,55.4783,61.5995,55.9817,56.0984,56.1899,56.1648,56.1899,55.9817,55.5698,55.3410,55.4325,55.4783,55.5698,55.5240,54.9977,55.9588,56.1670,56.1899,55.9588,55.6156,55.9611,55.5698,55.5698,55.5011,55.7323,56.0961,55.8924,55.5927,55.4096,55.8215,55.8696,55.4783,55.0870,55.4096,55.4783,55.3867,55.4096,55.3410,55.1327,55.9817,55.5469,55.4096,55.3181,55.4096,55.3867,55.8467,55.6613,55.5240,55.5927,55.5469" +normalizing a fully mergeable tiling of boxes - 1,"medium, native",100,69,2408100,304.9213,304.7587,305.1190,0.9131,0.7597,1.2093,"304.0290,305.1884,308.9710,305.4783,305.0435,304.3188,305.0435,304.3188,304.8986,304.3188,304.8986,304.3188,304.3188,305.3333,304.1739,305.4783,304.0290,305.4783,304.1739,305.4783,304.1739,305.4783,304.1739,305.4783,304.0290,306.0580,304.6087,306.0580,304.4638,305.9130,304.4638,306.0580,304.6087,306.0580,305.9130,307.3768,306.0580,307.5072,306.0580,307.5217,305.9130,305.3333,303.8841,305.3333,304.1739,305.3333,303.8841,305.1884,304.1739,305.3333,303.8696,305.3333,304.1594,305.1884,304.0145,305.3333,304.1739,305.3333,303.8841,305.1884,304.1739,305.1884,303.8841,305.3333,304.1739,305.3333,304.0290,305.1884,304.1739,305.1884,303.8841,305.3333,304.1739,305.3333,303.8841,305.1884,304.1739,305.1884,303.8841,305.3333,304.1739,305.3333,303.8841,305.1884,304.1739,305.1884,303.8841,305.3333,304.1594,305.3333,303.8696,305.1884,304.0145,305.1884,303.8841,305.3333,304.1739,305.3333,303.8841,305.1884" +normalizing a fully mergeable tiling of boxes - 1,"medium, embedded in 3d",100,52,2418000,500.5977,500.1190,501.9658,3.8285,1.6584,8.2957,"502.8269,500.5385,534.6346,500.1346,501.6923,498.0192,498.2115,500.7308,501.2885,498.7885,496.6731,501.6731,501.6731,499.1731,500.5385,501.0962,500.1538,497.2500,499.7500,499.7692,496.0962,496.6731,496.6731,499.9423,501.0962,501.1154,500.1346,501.5000,501.6731,498.7885,501.6731,499.3654,501.8846,500.5192,499.5577,498.9808,498.5962,500.1346,503.4231,499.3654,498.9808,502.6538,501.1154,502.4423,502.8462,502.2692,501.0962,499.1923,500.5192,501.6923,500.7115,498.9808,498.4231,497.8269,498.4038,501.1154,501.4808,499.3846,500.7115,496.2885,498.7885,501.6731,498.4038,499.0000,501.2885,503.8077,501.8846,499.7500,499.9423,500.7115,499.1731,501.6923,500.7115,499.7500,502.4615,501.2885,501.3077,500.9038,498.7885,498.9808,498.2115,497.4423,501.5000,499.9423,500.7308,502.6346,501.8846,499.7500,497.8462,500.9038,502.8462,502.6538,503.5962,502.4615,501.5000,500.3269,501.1154,500.1346,499.1731,496.8654" +normalizing a fully mergeable tiling of boxes - 1,"large, native",100,3,2608500,8995.5467,8983.1800,9044.6300,109.1132,22.7425,252.8114,"8936.3333,9003.3333,9167.0000,8963.0000,8976.3333,8963.0000,8959.6667,8963.0000,8990.0000,9006.3333,8986.3333,8976.3333,8966.3333,8989.6667,9003.0000,8996.3333,8990.0000,8993.0000,8993.0000,8989.6667,10045.0000,8983.0000,8976.3333,8963.0000,8973.0000,8976.3333,8959.6667,8943.0000,8949.6667,8993.0000,8983.0000,8976.6667,8973.0000,8993.0000,8983.0000,8973.0000,8996.6667,8953.0000,8999.6667,8979.6667,8993.3333,8969.6667,8946.3333,8963.0000,8963.0000,8996.3333,8980.0000,8969.6667,8993.0000,8979.6667,8976.3333,8956.3333,9003.3333,8989.6667,8993.0000,8946.3333,8936.3333,8986.6667,9006.3333,9016.3333,9026.6667,8946.3333,9013.0000,8986.3333,9023.0000,8979.6667,8989.6667,9000.0000,9006.3333,8999.6667,8976.3333,8949.6667,8983.0000,9013.0000,9026.6667,8983.0000,8973.0000,8989.6667,8976.3333,9016.3333,8973.0000,8959.6667,9003.3333,8976.3333,8993.0000,8936.3333,8979.6667,9006.6667,9016.3333,9026.6667,8989.6667,9006.3333,9020.0000,9006.3333,8976.3333,8983.0000,8999.6667,8969.6667,8949.6667,8976.3333" +normalizing a fully mergeable tiling of boxes - 1,"large, embedded in 3d",100,3,3560400,12190.2100,12174.3633,12249.3033,139.6125,31.2166,324.9354,"12142.6667,12195.6667,12386.3333,12175.6667,12132.3333,12189.3333,12199.0000,12162.6667,12159.0000,12226.0000,12155.6667,12179.0000,12152.6667,12232.3333,12159.3333,12162.3333,12142.3333,12179.3333,12165.6667,12182.3333,12166.0000,12189.0000,12149.0000,12159.3333,12205.6667,12189.3333,12189.0000,12145.6667,12186.0000,12169.0000,12212.6667,12132.3333,12202.3333,12135.6667,12169.0000,12175.6667,12289.3333,12186.0000,12169.0000,12166.0000,12195.6667,12162.3333,12172.6667,12205.6667,12209.3333,12132.3333,12189.3333,12205.6667,12196.0000,12179.0000,12209.3333,12192.3333,12199.3333,12215.6667,13535.0000,12146.0000,12119.0000,12155.6667,12185.6667,12172.3333,12159.0000,12139.0000,12196.0000,12219.0000,12212.3333,12135.6667,12192.6667,12139.0000,12149.0000,12119.0000,12189.3333,12165.6667,12165.6667,12165.6667,12199.0000,12156.0000,12175.6667,12145.6667,12169.3333,12152.3333,12152.3333,12186.0000,12199.0000,12142.3333,12122.3333,12202.3333,12145.6667,12182.6667,12202.3333,12196.0000,12172.3333,12176.0000,12152.3333,12199.0000,12142.6667,12172.3333,12175.6667,12216.0000,12145.6667,12149.0000" +normalizing a fully mergeable tiling of boxes - 2,"small, native",100,228,2394000,116.7842,116.6366,117.3363,1.2954,0.3031,2.9994,"116.9254,116.3947,118.8553,117.0570,116.9649,116.5307,116.6140,117.0088,116.8816,116.5263,116.8816,116.8816,116.9211,115.9605,116.4386,116.8377,117.0965,116.5702,116.5307,116.4825,116.3070,116.1360,116.4386,116.5746,116.8333,115.9605,117.2719,116.5702,116.5307,116.2193,116.4825,116.6184,116.6140,116.7456,116.9254,116.8333,117.1009,115.9561,116.5746,116.2632,116.9693,116.5702,116.7456,116.9254,116.8772,116.6623,116.5746,116.7895,116.5746,116.0439,116.7500,117.1404,116.6184,116.4386,116.7061,116.7018,116.6140,116.8816,117.0965,116.6623,116.4386,116.7018,116.3509,116.6140,116.7500,116.7456,116.7939,116.7895,116.8333,116.7061,116.6579,116.5307,116.9211,116.7500,116.0000,116.4868,129.1842,116.5263,116.4868,116.8333,116.8333,116.6184,116.4825,116.3553,116.3070,117.0570,117.1404,116.0439,116.4825,116.7500,116.6579,116.1798,116.4386,116.7061,116.7895,116.5702,116.6623,116.3947,116.5746,116.6140" +normalizing a fully mergeable tiling of boxes - 2,"small, embedded in 3d",100,198,2415600,127.3295,127.1025,128.1411,1.9645,0.5608,4.5054,"126.8485,127.3535,130.0354,127.7576,127.8586,127.7071,126.1869,126.7424,126.6970,126.6970,126.5909,126.7980,126.9495,127.3030,127.8081,126.8990,127.5556,127.9091,127.3535,127.0505,127.7576,127.3030,126.7929,126.6970,127.1515,126.9495,127.1465,127.6061,126.7929,126.1414,127.7576,127.6061,145.9242,127.1515,127.0960,127.0960,127.6566,126.9495,126.6465,126.7475,126.5909,126.5960,126.4899,126.7929,127.8081,127.1010,127.9091,127.5556,126.9495,126.6465,127.0000,127.6061,126.2374,127.4040,127.6566,127.6566,127.1515,126.6970,126.8990,127.5556,125.8838,126.6465,127.5051,127.6061,127.5556,126.9444,126.8434,127.3535,125.9899,127.6566,127.7071,127.7071,127.6566,127.3535,126.5909,125.9343,126.4899,127.6061,127.6566,127.7071,127.8586,127.6566,127.5556,126.4899,126.6414,127.5051,127.6061,127.7071,127.6566,127.7071,127.5556,126.0354,126.5960,126.4394,126.7475,126.6465,126.6414,126.5960,125.9848,127.5556" +normalizing a fully mergeable tiling of boxes - 2,"medium, native",100,26,2423200,928.8112,927.2100,932.9708,12.3394,5.8484,26.1780,"930.9231,927.0769,955.9615,932.4615,927.8462,927.0769,925.9231,928.6154,918.9615,926.6923,927.0769,927.8462,920.8846,920.1538,922.0385,924.0000,925.1538,929.0000,929.3846,914.7308,929.0000,928.2308,929.3846,927.0385,919.0000,922.8077,927.4615,929.7692,930.1538,929.7692,927.0769,913.1923,923.2308,917.4231,924.0000,930.9231,927.0769,923.5769,927.8462,935.1538,934.8077,932.8462,934.3846,915.5000,919.7692,932.8462,934.0000,931.6923,932.0769,912.4231,929.0000,935.5385,933.6538,932.4615,933.2308,925.9231,936.3077,933.2308,930.5385,922.0769,1034.5769,932.8462,926.6923,934.0000,932.4615,929.3846,922.0385,932.0769,927.8077,931.6923,931.3077,929.3462,920.5385,920.1154,928.2308,924.3846,928.6154,932.8462,930.5385,922.0769,926.2692,932.0769,932.8462,934.3846,933.2308,931.3077,927.4615,934.0000,925.1538,920.1154,919.7692,929.3846,913.9615,932.8462,925.9231,923.5769,931.6923,934.4231,913.9615,930.9231" +normalizing a fully mergeable tiling of boxes - 2,"medium, embedded in 3d",100,24,2452800,1058.9946,1057.9879,1060.5850,6.3014,4.3349,11.2512,"1059.0000,1046.5000,1102.4167,1057.7500,1055.6667,1064.8750,1062.7500,1063.2083,1062.3333,1058.1667,1063.6250,1054.0000,1066.1250,1065.6667,1064.0417,1060.2500,1064.8750,1064.8333,1063.2083,1062.3333,1059.4583,1062.7500,1064.0417,1064.4167,1063.6250,1065.6667,1061.1250,1061.0833,1061.9583,1054.8333,1062.3333,1061.9583,1059.0000,1056.9167,1054.8333,1056.5417,1057.7500,1054.0000,1053.1667,1057.7500,1061.1250,1059.4167,1053.5833,1061.0833,1056.5000,1056.9167,1057.3333,1059.0000,1057.7500,1055.2500,1057.7500,1062.7917,1044.3750,1061.5417,1052.7500,1060.6667,1057.7500,1056.5417,1064.4167,1060.2917,1052.3333,1053.5833,1051.0833,1056.9167,1063.1667,1058.2083,1054.8333,1052.7500,1044.7917,1069.4167,1060.2917,1057.3333,1056.0833,1056.9167,1059.0417,1056.0833,1051.5000,1051.5000,1054.0000,1057.3333,1056.9167,1055.2500,1052.3333,1053.5833,1054.0000,1060.6667,1063.6250,1059.0000,1055.6667,1056.5000,1059.8750,1059.4167,1058.1667,1060.7083,1057.3333,1061.9167,1063.2083,1059.0000,1059.4583,1064.0000" +normalizing a fully mergeable tiling of boxes - 2,"large, native",100,1,3682900,37417.9000,37338.0300,37616.3900,593.9707,174.9913,1135.5862,"37319.0000,37168.0000,40635.0000,38061.0000,37819.0000,37530.0000,37399.0000,37669.0000,37239.0000,37359.0000,37139.0000,37459.0000,37309.0000,37399.0000,37198.0000,37259.0000,37429.0000,37069.0000,37258.0000,37490.0000,37529.0000,37259.0000,37429.0000,37198.0000,37179.0000,37078.0000,37358.0000,37288.0000,37289.0000,37289.0000,37339.0000,37529.0000,37479.0000,37409.0000,37358.0000,37339.0000,37139.0000,37399.0000,36928.0000,37329.0000,37419.0000,37429.0000,37109.0000,37299.0000,37499.0000,37158.0000,37179.0000,37289.0000,37178.0000,37228.0000,37409.0000,37409.0000,42078.0000,37489.0000,37228.0000,37129.0000,37279.0000,37519.0000,37459.0000,37379.0000,37179.0000,37339.0000,37419.0000,37268.0000,37489.0000,37218.0000,37249.0000,37269.0000,37209.0000,37339.0000,37509.0000,37529.0000,37289.0000,37299.0000,37499.0000,37419.0000,37149.0000,37218.0000,37338.0000,37450.0000,37579.0000,37539.0000,37239.0000,37279.0000,37209.0000,37268.0000,37309.0000,37459.0000,37209.0000,37179.0000,37148.0000,37369.0000,37369.0000,37479.0000,37258.0000,37510.0000,37228.0000,37119.0000,37289.0000,37559.0000" +normalizing a fully mergeable tiling of boxes - 2,"large, embedded in 3d",100,1,4085200,40948.9300,40888.0000,41090.6900,446.3709,176.1642,818.0107,"40755.0000,40745.0000,43340.0000,41757.0000,40996.0000,41216.0000,41076.0000,40866.0000,41136.0000,40796.0000,40925.0000,40856.0000,40926.0000,40935.0000,40866.0000,40956.0000,40905.0000,41136.0000,41066.0000,40695.0000,40946.0000,40695.0000,40675.0000,40836.0000,40775.0000,40946.0000,40655.0000,41026.0000,44282.0000,40966.0000,40816.0000,40715.0000,41086.0000,40665.0000,40746.0000,40655.0000,40575.0000,40815.0000,40785.0000,41036.0000,40775.0000,40746.0000,40885.0000,40696.0000,40945.0000,40795.0000,40926.0000,40986.0000,40785.0000,40826.0000,40745.0000,41046.0000,40816.0000,40685.0000,41086.0000,40966.0000,41016.0000,40915.0000,40716.0000,40875.0000,40976.0000,40956.0000,40775.0000,40986.0000,40826.0000,40765.0000,41126.0000,40786.0000,40996.0000,40755.0000,40976.0000,41166.0000,40936.0000,40906.0000,40985.0000,40915.0000,40876.0000,40805.0000,41147.0000,40815.0000,40956.0000,40835.0000,40746.0000,41126.0000,40695.0000,41046.0000,40946.0000,41056.0000,40886.0000,40695.0000,41016.0000,40715.0000,40836.0000,40865.0000,40966.0000,41106.0000,40656.0000,40695.0000,41016.0000,40685.0000" +normalizing a fully mergeable tiling of boxes - 3,"small, native",100,94,2415800,262.9479,262.5179,264.4197,3.6823,1.0882,8.3973,"263.7766,262.3936,268.1489,261.5319,261.6383,261.9681,264.2021,261.0106,261.8617,261.7447,263.2447,263.5638,263.2447,263.7766,260.1489,262.0745,261.9681,261.8617,262.2872,261.3191,260.7979,263.7766,261.8617,263.2447,262.6064,261.3298,297.6702,264.3085,261.4362,263.5638,264.8404,263.0319,263.4574,263.8830,262.5000,261.1170,261.4362,261.9681,262.2766,262.7128,263.8830,262.1702,262.0745,263.5638,262.9255,262.5000,262.7128,261.7553,261.1170,262.3830,263.6702,262.9255,262.5000,262.7021,263.6702,260.1489,262.0745,261.9681,262.1809,262.2766,263.7766,263.8830,260.3617,263.5638,263.9894,262.8191,262.9255,263.7766,263.8830,262.5000,263.8830,261.9681,261.9681,261.8511,262.2872,260.5851,262.3936,262.2872,263.4574,263.8830,263.9894,263.3511,260.9043,261.4255,261.7447,261.9681,263.0319,263.3511,262.2872,260.7979,262.3830,263.2447,263.7766,262.8191,262.2872,261.8511,260.6915,262.2872,263.7766,263.9894" +normalizing a fully mergeable tiling of boxes - 3,"medium, native",100,16,2516800,1577.9200,1576.6731,1581.6519,10.0261,4.0405,22.0724,"1570.3750,1573.5000,1668.0625,1576.0000,1575.3750,1576.0000,1572.2500,1570.9375,1577.2500,1579.1250,1574.1250,1579.1250,1580.3750,1584.7500,1577.1875,1572.8125,1580.3750,1577.2500,1573.5000,1577.8750,1579.1250,1576.0000,1577.8750,1566.0000,1575.3750,1574.1250,1581.6250,1579.1250,1576.0000,1577.2500,1558.4375,1572.8750,1581.6250,1577.2500,1574.1250,1577.2500,1576.0000,1582.8750,1567.2500,1579.7500,1579.7500,1578.5000,1574.7500,1573.4375,1574.1250,1574.1250,1569.7500,1576.6250,1578.5000,1574.1250,1579.7500,1576.0000,1582.2500,1568.5000,1577.2500,1573.4375,1577.1875,1574.0625,1575.3750,1574.7500,1576.0000,1574.1250,1581.6250,1576.6250,1578.5000,1583.5000,1576.0000,1574.7500,1586.0000,1576.0000,1584.1250,1579.6875,1583.5000,1583.5000,1576.0000,1575.3750,1580.3750,1576.6250,1582.2500,1576.6250,1582.8750,1579.7500,1584.1250,1581.6250,1577.8750,1570.3750,1580.3750,1584.1250,1581.6250,1577.2500,1580.3750,1577.8125,1572.1875,1579.7500,1576.6250,1576.0000,1581.0000,1576.6250,1576.6250,1575.3750" +normalizing a fully mergeable tiling of boxes - 3,"large, native",100,1,4675000,47880.5400,47598.7600,48461.9000,1967.5497,1105.7194,3488.7917,"47328.0000,47197.0000,61745.0000,57277.0000,53409.0000,50785.0000,50063.0000,49662.0000,48881.0000,50133.0000,49201.0000,48460.0000,47878.0000,48080.0000,47929.0000,47628.0000,47799.0000,47448.0000,49803.0000,49962.0000,49311.0000,48781.0000,48289.0000,47819.0000,47809.0000,47809.0000,50333.0000,47649.0000,47448.0000,47338.0000,47208.0000,47518.0000,47197.0000,47218.0000,47398.0000,46917.0000,47158.0000,47047.0000,47067.0000,47037.0000,47027.0000,47067.0000,47287.0000,46987.0000,47137.0000,47208.0000,47257.0000,47178.0000,47067.0000,47318.0000,47388.0000,47228.0000,47198.0000,47187.0000,47278.0000,47147.0000,46917.0000,47157.0000,47148.0000,47087.0000,47278.0000,47228.0000,47147.0000,47258.0000,47187.0000,47357.0000,47368.0000,47258.0000,46927.0000,47088.0000,47247.0000,47388.0000,47268.0000,47298.0000,47318.0000,47308.0000,46927.0000,47398.0000,47167.0000,47188.0000,47158.0000,47047.0000,47278.0000,47077.0000,47057.0000,47118.0000,47348.0000,47278.0000,47478.0000,47187.0000,47268.0000,47168.0000,47568.0000,47408.0000,47288.0000,47187.0000,47608.0000,47077.0000,47208.0000,47358.0000" +performing set operations between randomized regions - 2d,"union, small, native",100,24,2452800,1077.8208,1076.7900,1080.9275,8.3459,3.4928,18.2336,"1074.4583,1074.8750,1152.5000,1086.1667,1080.7083,1085.3333,1085.7083,1077.3750,1084.0833,1079.4583,1077.3750,1072.3750,1079.4583,1075.7083,1078.6667,1082.3750,1085.7500,1082.7917,1081.5417,1071.9583,1073.2083,1067.3750,1078.2083,1074.0417,1078.2083,1076.5417,1073.6250,1078.2500,1072.7500,1074.0000,1074.4583,1078.6250,1079.0417,1082.3750,1075.2917,1075.7083,1079.8750,1078.6250,1077.7917,1067.3750,1078.2083,1079.4583,1076.9583,1075.7083,1078.6250,1074.4583,1078.6250,1083.2083,1078.2500,1075.7083,1075.2917,1079.0417,1075.7083,1077.3750,1078.2083,1077.8333,1072.7500,1074.8750,1073.5833,1076.5417,1075.7083,1078.2083,1079.4583,1074.0417,1078.6250,1079.0833,1079.4583,1071.9583,1074.4583,1083.6250,1074.4583,1078.6250,1076.5417,1077.3750,1076.1250,1076.9583,1075.7083,1078.6250,1073.2083,1082.8333,1081.1250,1079.4583,1076.9583,1079.0833,1078.2083,1076.5417,1071.9583,1076.9583,1073.6250,1075.2917,1074.4583,1078.6250,1075.7083,1076.5417,1071.1250,1071.1250,1075.2917,1077.7917,1071.9583,1070.7083" +performing set operations between randomized regions - 2d,"union, small, embedded in 3d",100,21,2457000,1232.2724,1231.0986,1236.2157,9.8600,3.3739,22.2573,"1227.9524,1227.9524,1323.8095,1238.4762,1236.5238,1244.1905,1236.0476,1238.0000,1231.2857,1238.4286,1232.7143,1234.1429,1229.3810,1235.5714,1234.1429,1238.9048,1230.8095,1227.4762,1238.4762,1237.4762,1234.6190,1233.2381,1234.6190,1236.0476,1234.6190,1231.7619,1236.0476,1228.4286,1232.2381,1229.8571,1233.7143,1229.3810,1232.2381,1234.6190,1230.8095,1231.3333,1226.5238,1230.8095,1231.7619,1230.3333,1236.0476,1228.4286,1231.3333,1227.4762,1229.3810,1221.7143,1227.9524,1230.8095,1231.2857,1230.3333,1229.8571,1229.8571,1231.2857,1227.9524,1228.9048,1229.9048,1231.7619,1233.6667,1228.4286,1231.2857,1228.4286,1228.9048,1232.7619,1229.8571,1226.5238,1231.2857,1231.7619,1230.3333,1231.2857,1227.9524,1231.2857,1227.4286,1235.5714,1223.6667,1230.8095,1230.3333,1227.9524,1233.1905,1226.0476,1234.1429,1229.8571,1231.7619,1225.5714,1229.8571,1235.0952,1232.7143,1227.0000,1233.2381,1227.4762,1230.3333,1228.9048,1227.0000,1232.7143,1230.8095,1228.9048,1232.7143,1232.7143,1227.4762,1229.8571,1231.2857" +performing set operations between randomized regions - 2d,"intersection, small, native",100,103,2410200,238.2253,238.0457,238.5232,1.1545,0.7701,2.0624,"238.2913,238.4951,246.2816,241.5049,240.3398,239.2718,238.9806,236.6408,238.4951,238.0971,238.1068,237.9126,238.3883,236.4563,238.4854,238.3010,238.6893,238.4854,238.4951,238.1068,236.5437,237.9126,238.9709,238.2913,238.1068,238.1942,236.4466,238.8835,237.9029,238.5922,238.4951,238.4854,238.7864,236.7379,237.8155,238.3981,238.8738,237.9029,237.5243,236.5437,238.0000,237.7087,238.9806,238.0000,238.4854,238.7864,236.3495,238.3010,238.3010,238.0971,237.9126,238.4951,237.0291,238.2039,239.0680,237.9126,238.1068,238.0971,238.0097,236.2524,238.4951,238.3981,238.2913,238.7864,238.0097,236.5437,238.3010,238.1942,238.5922,238.6893,238.3883,238.9806,236.3592,237.9029,239.2718,238.7864,237.9029,238.2039,236.5437,238.3010,238.1068,238.4854,238.5922,238.9806,238.2039,236.6408,238.5922,238.2913,238.3981,238.1068,238.4854,237.4272,237.7087,238.1068,238.3981,238.6796,238.0097,238.1068,236.5437,238.3981" +performing set operations between randomized regions - 2d,"intersection, small, embedded in 3d",100,88,2420000,280.9367,280.7103,281.5351,1.7669,0.8133,3.7626,"280.6250,279.6023,296.2159,284.5000,283.4659,282.4432,280.0568,280.7386,281.3068,280.8523,280.9659,281.0795,278.9205,281.3068,281.1932,280.8523,280.7386,281.4205,281.1932,278.5795,281.0795,280.5114,280.9659,281.4205,282.1023,280.8523,279.8295,280.7386,281.1932,281.0795,280.3977,280.2841,281.0795,279.2500,280.6250,281.1932,280.9659,280.9659,281.1932,281.3068,279.1364,282.2159,280.9545,280.8409,281.3068,280.2727,279.4886,280.9659,281.1932,280.9659,281.3068,280.9659,280.3977,279.3750,281.3068,280.6250,281.3068,281.1932,280.7386,281.5341,279.2614,280.5114,281.5341,281.0795,280.7386,281.1932,280.3977,279.0341,280.9659,280.6136,280.8409,281.1932,281.0795,280.2727,279.1364,280.5114,280.6250,281.3068,281.0795,280.5114,279.6023,280.6250,281.1932,281.5341,280.8523,280.9659,280.9659,279.0341,280.3977,281.5341,281.0795,280.3977,281.9886,280.5114,278.6818,280.7386,280.9659,280.2841,280.8523,280.3977" +performing set operations between randomized regions - 2d,"difference, small, native",100,24,2426400,1038.9642,1037.7217,1042.4117,9.7957,4.4481,21.1332,"1032.2917,1036.0417,1124.9583,1047.3333,1046.5000,1046.0417,1050.2083,1044.8333,1045.2500,1046.9167,1038.9583,1042.7500,1043.5417,1042.3333,1043.9583,1042.7500,1048.1667,1037.2917,1034.7917,1042.3333,1045.6667,1040.6250,1045.2500,1041.4583,1040.2500,1036.0417,1040.6667,1042.7083,1044.0000,1039.3750,1038.5833,1042.7083,1035.6250,1042.2917,1036.8750,1039.8333,1046.5000,1038.9583,1040.2083,1034.8333,1040.2083,1038.5833,1040.2083,1035.6250,1039.0000,1041.0417,1036.4583,1034.8333,1039.3750,1037.7083,1032.2917,1037.7083,1038.9583,1033.1250,1033.9583,1035.6250,1036.0417,1036.0417,1033.5417,1039.3750,1036.0417,1031.4583,1039.0000,1041.8750,1035.6667,1039.7917,1039.7917,1040.6250,1034.7917,1040.2500,1032.7083,1032.2917,1034.7917,1032.2917,1036.9167,1031.0417,1032.7083,1035.2083,1032.7083,1034.7917,1032.2917,1038.5417,1028.5417,1038.9583,1039.0000,1032.7083,1034.7917,1037.2917,1035.2500,1031.0417,1037.2917,1036.0417,1037.3333,1035.6250,1031.8750,1032.7083,1026.4583,1035.2083,1033.5417,1037.7500" +performing set operations between randomized regions - 2d,"difference, small, embedded in 3d",100,21,2490600,1228.7514,1227.6848,1231.3838,8.0035,4.0108,16.7073,"1221.7619,1227.4762,1296.1905,1239.9048,1233.6667,1229.3810,1239.8571,1229.9048,1236.0476,1223.1905,1228.4286,1227.0000,1226.5238,1228.9048,1224.1429,1232.7143,1230.8095,1229.3810,1227.4762,1231.7619,1227.0000,1232.2381,1229.3810,1228.3810,1222.1905,1222.2381,1229.8571,1234.6190,1226.4762,1234.6190,1227.4762,1231.2857,1227.4762,1234.6190,1231.7619,1227.4762,1228.4286,1230.3810,1228.4286,1231.7619,1227.0000,1234.1429,1221.2857,1228.9048,1225.0952,1223.6667,1228.4286,1228.9048,1227.0000,1225.0952,1228.4286,1221.7143,1227.0000,1228.9048,1227.9048,1228.9048,1234.1429,1231.2857,1229.3810,1225.5714,1223.1905,1226.5238,1229.8571,1224.1429,1228.9048,1228.4286,1227.0000,1232.2381,1229.8571,1210.7619,1223.6667,1229.3810,1229.8571,1227.9048,1233.6667,1226.5238,1227.9524,1238.4286,1222.2381,1224.6190,1225.5714,1228.4286,1227.9524,1223.6667,1230.8095,1226.0476,1230.8095,1221.2857,1230.8095,1227.4762,1229.8571,1222.7143,1226.0476,1227.4762,1223.6667,1225.5238,1220.3333,1225.0952,1225.5714,1227.4762" +performing set operations between randomized regions - 2d,"union, medium, native",100,2,3202200,14946.5700,14912.4100,15083.3700,306.2121,58.0280,718.1210,"14937.5000,14872.5000,17928.0000,15233.0000,15112.5000,14987.5000,15048.0000,14942.0000,14862.5000,14852.0000,14872.5000,14797.0000,14882.0000,14982.0000,14887.5000,14852.0000,14817.5000,14982.5000,14987.5000,14892.0000,14837.5000,14947.0000,14982.5000,14947.5000,14917.5000,14892.0000,14877.5000,14942.5000,14967.5000,14877.0000,14887.5000,14857.0000,14987.5000,14972.5000,14872.5000,14947.0000,14962.5000,14857.5000,14957.0000,14962.0000,14852.5000,14902.0000,14952.5000,14872.5000,14992.5000,14897.0000,14902.5000,14907.5000,14942.0000,14952.5000,14907.5000,14947.5000,14972.5000,14902.0000,14902.5000,14907.0000,14902.0000,14912.5000,14847.0000,14887.5000,14852.0000,14967.5000,14817.0000,14902.5000,14897.5000,14942.0000,14892.5000,14827.0000,14912.5000,14822.0000,14922.5000,14912.5000,14842.0000,14952.5000,14942.5000,14902.0000,14897.5000,14842.0000,14942.5000,14912.5000,14837.0000,14832.0000,14892.0000,14852.5000,14857.0000,14842.0000,14917.5000,14957.5000,14882.0000,14937.0000,14982.5000,14967.5000,15002.5000,14992.5000,14882.5000,14972.0000,14982.0000,14902.5000,14927.5000,14897.0000" +performing set operations between randomized regions - 2d,"union, medium, embedded in 3d",100,2,3482600,17916.9450,17873.8600,18025.7600,317.1713,73.4337,572.0912,"17863.0000,17793.0000,20192.0000,18289.0000,17998.0000,17943.0000,17873.0000,17828.0000,17888.0000,17823.0000,17928.0000,17913.0000,17958.0000,17883.0000,17863.0000,17778.0000,17852.5000,18023.5000,17873.0000,17797.5000,17788.0000,17993.0000,17943.5000,17882.5000,17767.5000,17818.0000,17848.0000,17857.5000,17913.0000,17853.0000,19962.0000,17873.0000,17833.0000,17787.5000,17933.5000,17792.5000,17848.0000,17823.0000,17858.0000,17933.0000,17822.5000,17808.0000,17818.0000,17827.5000,17953.0000,17802.5000,17928.0000,17832.5000,17883.0000,17838.0000,17868.0000,17848.0000,17807.5000,17808.0000,17778.0000,17908.0000,17873.0000,17887.5000,17867.5000,17918.0000,17822.5000,17798.0000,17868.0000,17888.0000,17837.5000,17923.5000,17857.5000,17738.0000,17943.0000,17868.0000,17838.0000,17792.5000,17968.5000,17872.5000,17948.0000,17822.5000,17878.0000,17888.0000,17938.0000,17848.0000,17878.0000,17938.0000,17858.0000,17958.0000,17823.0000,17948.0000,17898.0000,17878.0000,17823.0000,17918.0000,17888.0000,17948.0000,17763.0000,17888.0000,17938.0000,17802.5000,17978.0000,17807.5000,17852.5000,17827.5000" +performing set operations between randomized regions - 2d,"intersection, medium, native",100,10,2475000,2435.9130,2427.7080,2452.0760,56.3861,34.7878,108.2267,"2382.3000,2373.3000,2581.7000,2517.6000,2499.5000,2501.6000,2495.5000,2500.6000,2485.5000,2494.5000,2464.4000,2493.6000,2438.4000,2450.5000,2477.5000,2473.5000,2417.4000,2446.4000,2424.4000,2455.5000,2476.5000,2475.5000,2380.3000,2411.3000,2426.4000,2394.4000,2415.4000,2428.4000,2472.5000,2424.4000,2444.5000,2436.4000,2403.4000,2414.4000,2397.3000,2414.4000,2411.4000,2392.3000,2368.3000,2419.3000,2420.4000,2399.3000,2390.4000,2413.3000,2382.4000,2427.4000,2423.4000,2429.4000,2435.4000,2397.3000,2399.4000,2401.3000,2396.4000,2861.2000,2401.4000,2473.5000,2411.4000,2402.3000,2454.4000,2482.5000,2438.5000,2442.4000,2454.5000,2457.5000,2436.4000,2435.4000,2417.4000,2439.4000,2495.5000,2494.5000,2455.5000,2415.4000,2452.5000,2432.4000,2424.4000,2388.3000,2413.4000,2432.4000,2424.4000,2411.3000,2418.4000,2374.4000,2409.3000,2416.4000,2415.4000,2452.5000,2435.4000,2405.4000,2434.4000,2395.4000,2435.4000,2413.4000,2420.4000,2405.4000,2401.3000,2414.4000,2408.4000,2416.4000,2387.3000,2387.4000" +performing set operations between randomized regions - 2d,"intersection, medium, embedded in 3d",100,12,2528400,2140.4867,2138.7092,2142.9592,10.6192,8.1471,17.0758,"2140.5833,2129.6667,2204.0000,2159.8333,2160.5833,2147.2500,2148.9167,2160.5833,2154.7500,2154.7500,2151.4167,2146.4167,2152.2500,2142.1667,2156.4167,2148.9167,2159.7500,2143.9167,2146.4167,2146.4167,2144.7500,2135.5000,2133.8333,2137.2500,2142.2500,2138.0833,2138.8333,2138.0833,2139.7500,2138.9167,2130.5000,2123.0833,2137.2500,2140.5000,2135.5833,2146.4167,2140.5833,2135.5833,2133.0000,2121.4167,2134.6667,2136.4167,2128.0833,2133.8333,2136.4167,2136.4167,2130.5000,2136.4167,2128.0833,2136.3333,2133.0833,2136.4167,2140.5833,2134.6667,2133.9167,2131.4167,2133.0000,2125.5833,2132.1667,2142.1667,2150.5833,2135.5833,2134.7500,2149.7500,2128.0000,2126.4167,2129.6667,2136.3333,2128.0833,2133.8333,2134.6667,2134.7500,2144.7500,2140.5833,2130.5000,2135.5833,2134.7500,2140.5000,2133.0000,2133.9167,2135.5833,2139.6667,2153.9167,2136.3333,2135.5833,2138.0833,2145.5833,2147.2500,2138.8333,2138.8333,2147.2500,2140.5833,2148.9167,2154.7500,2150.5833,2146.4167,2140.5833,2136.4167,2149.7500,2141.4167" +performing set operations between randomized regions - 2d,"difference, medium, native",100,3,2451300,8445.0533,8419.7767,8511.7933,188.3770,29.0804,380.5783,"8411.6667,8435.3333,9420.6667,8452.0000,8448.6667,8442.0000,8442.0000,8452.3333,8445.3333,8382.0000,8455.3333,8438.6667,8378.6667,8482.0000,8398.6667,8395.3333,8422.0000,8398.6667,8415.3333,8368.3333,8428.6667,8452.0000,8448.6667,8458.6667,8412.0000,8415.3333,8415.3333,8375.3333,8358.6667,8395.3333,8411.6667,8425.3333,8422.0000,8402.0000,8411.6667,8385.0000,8415.3333,8445.3333,8422.0000,8448.6667,8452.0000,8448.6667,8388.6667,8435.3333,8418.6667,10011.6667,8442.0000,8449.0000,8428.6667,8455.3333,8428.6667,8408.6667,8362.0000,8415.3333,8418.6667,8418.6667,8415.3333,8445.3333,8435.3333,8415.3333,8428.6667,8425.3333,8412.0000,8432.0000,8438.6667,8372.0000,8392.0000,8371.6667,8415.0000,8458.6667,8425.3333,8392.0000,8445.3333,8408.6667,8422.0000,8428.6667,8368.6667,8498.6667,8442.0000,8438.6667,8422.0000,8412.0000,8435.3333,8375.0000,8405.3333,8442.0000,8432.0000,8428.6667,8428.6667,8395.0000,8381.6667,8382.0000,8375.3333,8442.0000,8382.0000,8405.3333,8408.6667,8455.3333,8358.6667,8395.0000" +performing set operations between randomized regions - 2d,"difference, medium, embedded in 3d",100,3,2616600,9147.8200,9048.7567,9522.5133,857.8682,204.1334,1966.1613,"9003.0000,10118.3333,17275.3333,11174.0000,9874.6667,9333.6667,9223.6667,9250.3333,9203.3333,9096.6667,9116.6667,9153.3333,9076.6667,9163.3333,9090.0000,9090.0000,9066.6667,9126.6667,9120.0000,9079.6667,9069.6667,9076.6667,9070.0000,9116.6667,9016.3333,8963.0000,8953.0000,9150.0000,9073.0000,9113.3333,9090.0000,8999.6667,8983.3333,9019.6667,8999.6667,9030.0000,9053.3333,9029.6667,9043.3333,8973.0000,9036.3333,9006.6667,9056.3333,9023.3333,8979.6667,8993.0000,8973.0000,9013.0000,8979.6667,8963.0000,8989.6667,8999.6667,8963.0000,9039.6667,8993.0000,8999.6667,8966.3333,9036.6667,8983.0000,8966.3333,8969.6667,9016.6667,9009.6667,9016.3333,8953.0000,9049.6667,8939.6667,9020.0000,8979.6667,8993.0000,8946.3333,8989.6667,8986.6667,8989.6667,9029.6667,8959.6667,9046.3333,9053.3333,8939.6667,8956.3333,8983.0000,8969.6667,8989.6667,9003.3333,8993.0000,8973.0000,8953.0000,8986.3333,8986.3333,8946.3333,8959.6667,9023.0000,8959.6667,8946.3333,8976.6667,8932.6667,9006.3333,8963.0000,8966.3333,8999.6667" +performing set operations between randomized regions - 2d,"union, large, native",100,1,20288900,205830.3500,204927.7600,209169.5000,7996.2994,1697.3061,18620.8953,"205287.0000,204717.0000,211048.0000,200890.0000,198594.0000,198064.0000,197352.0000,196881.0000,283165.0000,205809.0000,205948.0000,205859.0000,206069.0000,206309.0000,204857.0000,204957.0000,205558.0000,205247.0000,205197.0000,205388.0000,205017.0000,204766.0000,205437.0000,205388.0000,205929.0000,204986.0000,205198.0000,208433.0000,205318.0000,204205.0000,204686.0000,205017.0000,204827.0000,205578.0000,205167.0000,206891.0000,205157.0000,205698.0000,204556.0000,204386.0000,205177.0000,204887.0000,204586.0000,205718.0000,205037.0000,204727.0000,208263.0000,204716.0000,205047.0000,205107.0000,205187.0000,204776.0000,205849.0000,205007.0000,205287.0000,205428.0000,204195.0000,205658.0000,206018.0000,204837.0000,204296.0000,204846.0000,204867.0000,205347.0000,204967.0000,205077.0000,209005.0000,205989.0000,204886.0000,205218.0000,206179.0000,204666.0000,205889.0000,205147.0000,204887.0000,205017.0000,205026.0000,204906.0000,205027.0000,205007.0000,205017.0000,205408.0000,205468.0000,204786.0000,204987.0000,209886.0000,205508.0000,204897.0000,205207.0000,205067.0000,204867.0000,205768.0000,204687.0000,204956.0000,205849.0000,204957.0000,204736.0000,205618.0000,204887.0000,204887.0000" +performing set operations between randomized regions - 2d,"union, large, embedded in 3d",100,1,21382600,217603.8800,217435.4500,217892.3600,1096.9911,718.8270,1662.6519,"217120.0000,217170.0000,223913.0000,218262.0000,217872.0000,217160.0000,217360.0000,217691.0000,217090.0000,217300.0000,217130.0000,217611.0000,217761.0000,217431.0000,217701.0000,216809.0000,217180.0000,219895.0000,216769.0000,217241.0000,217570.0000,217281.0000,217340.0000,217971.0000,217610.0000,217571.0000,217050.0000,217641.0000,216909.0000,217080.0000,216789.0000,217620.0000,217130.0000,217781.0000,216659.0000,216939.0000,220937.0000,217771.0000,217199.0000,217400.0000,217561.0000,217691.0000,217160.0000,217280.0000,218122.0000,217300.0000,217761.0000,217801.0000,216870.0000,217150.0000,217701.0000,217350.0000,216850.0000,216779.0000,221769.0000,217340.0000,217350.0000,217100.0000,217381.0000,217350.0000,217260.0000,217641.0000,217711.0000,217811.0000,217220.0000,217380.0000,217160.0000,217410.0000,217351.0000,217931.0000,216950.0000,217130.0000,222500.0000,217631.0000,216919.0000,217611.0000,216669.0000,217020.0000,217120.0000,217300.0000,217871.0000,217330.0000,217771.0000,218022.0000,217049.0000,217401.0000,216689.0000,218142.0000,217290.0000,217180.0000,217170.0000,219825.0000,217030.0000,217841.0000,217321.0000,217881.0000,217220.0000,217150.0000,217180.0000,216920.0000" +performing set operations between randomized regions - 2d,"intersection, large, native",100,2,4135000,20942.1950,20912.0100,21015.9650,223.6835,72.1565,400.9531,"20984.0000,20853.5000,22597.0000,21219.5000,21003.5000,20929.0000,21024.0000,20923.5000,21004.0000,20823.5000,20944.0000,20898.5000,20939.0000,20898.5000,20773.5000,20948.5000,20883.5000,21019.0000,20903.5000,20929.0000,20898.5000,20928.5000,20919.0000,20898.5000,21004.0000,20919.0000,20953.5000,20813.5000,20894.0000,20858.5000,20843.5000,20893.5000,20844.0000,20943.5000,20833.5000,20889.0000,20798.5000,20838.5000,20918.5000,20873.5000,20968.5000,20848.5000,20944.0000,20868.5000,20964.0000,20833.5000,20969.0000,20848.5000,20868.5000,20918.5000,20853.5000,21003.5000,20828.5000,20958.5000,20818.5000,20944.0000,20923.5000,20879.0000,20993.5000,20849.0000,20958.5000,20808.5000,20949.0000,20863.5000,20898.5000,20913.5000,20928.5000,20999.0000,20813.5000,21059.0000,20894.0000,20988.5000,20879.0000,20928.5000,20964.0000,20933.5000,22241.5000,20838.5000,20923.5000,21019.0000,20949.0000,20793.5000,20878.5000,20803.5000,20873.5000,20949.0000,20938.5000,20948.5000,20878.5000,20944.0000,20863.5000,20939.0000,20783.5000,20858.5000,21069.0000,20873.5000,20974.0000,20883.5000,20994.0000,20878.5000" +performing set operations between randomized regions - 2d,"intersection, large, embedded in 3d",100,2,3996000,20347.2650,20316.4700,20417.4150,224.9979,80.7172,393.2798,"20338.0000,20397.5000,21795.0000,20748.0000,20383.0000,20462.5000,20418.0000,20428.0000,20357.5000,20347.5000,20252.5000,20337.5000,20293.0000,20287.5000,20402.5000,20377.5000,20282.5000,20342.5000,20468.0000,20197.0000,20388.0000,20207.5000,20357.5000,20257.5000,20417.5000,20343.0000,20367.5000,20262.5000,20302.5000,20393.0000,20307.5000,20302.5000,20182.5000,20347.5000,20247.5000,20267.5000,20252.5000,20287.5000,20232.5000,20382.5000,20242.5000,20352.5000,20267.5000,20383.0000,20232.0000,20312.5000,20212.0000,20252.5000,20323.0000,20282.5000,20312.5000,20272.5000,20267.5000,20272.5000,20247.5000,20222.0000,20377.5000,20252.5000,20302.5000,20257.5000,20317.5000,21850.5000,20307.5000,20317.5000,20338.0000,20367.5000,20217.5000,20432.5000,20227.5000,20327.5000,20247.0000,20247.5000,20287.5000,20343.0000,20192.0000,20308.0000,20132.0000,20312.5000,20292.5000,20282.5000,20292.5000,20282.5000,20403.0000,20337.5000,20418.0000,20267.5000,20412.5000,20348.0000,20337.5000,20272.5000,20392.5000,20262.5000,20393.0000,20347.5000,20382.5000,20297.5000,20337.5000,20232.5000,20337.5000,20357.5000" +performing set operations between randomized regions - 2d,"difference, large, native",100,1,62317800,636077.8700,630964.1700,640219.2300,23237.6678,19364.9433,26633.5634,"583695.0000,586922.0000,647967.0000,646935.0000,644600.0000,646444.0000,648408.0000,652085.0000,645302.0000,650362.0000,644781.0000,645963.0000,646244.0000,652315.0000,644591.0000,646514.0000,647286.0000,649731.0000,645151.0000,648529.0000,648157.0000,646625.0000,649329.0000,643339.0000,644700.0000,646774.0000,649961.0000,650893.0000,645051.0000,646915.0000,647105.0000,650853.0000,644300.0000,649189.0000,647326.0000,648067.0000,644050.0000,645372.0000,649480.0000,645983.0000,645662.0000,647106.0000,644450.0000,650512.0000,652576.0000,645612.0000,648187.0000,648518.0000,646484.0000,646374.0000,647626.0000,643519.0000,645412.0000,645222.0000,644611.0000,646173.0000,651314.0000,646845.0000,646344.0000,644300.0000,644440.0000,642667.0000,655031.0000,645282.0000,649951.0000,644400.0000,643929.0000,645342.0000,643328.0000,653257.0000,646735.0000,647185.0000,649671.0000,646895.0000,644831.0000,648247.0000,647275.0000,647767.0000,646023.0000,643469.0000,646224.0000,646454.0000,662024.0000,597882.0000,587041.0000,590308.0000,587022.0000,583915.0000,600277.0000,590728.0000,584086.0000,588735.0000,591129.0000,583986.0000,590638.0000,595678.0000,587012.0000,583705.0000,587502.0000,583575.0000" +performing set operations between randomized regions - 2d,"difference, large, embedded in 3d",100,1,68458200,680331.1600,673002.2600,686548.3500,34292.9860,30015.1362,37523.3745,"700026.0000,699635.0000,640172.0000,621056.0000,619994.0000,618511.0000,627408.0000,620995.0000,622498.0000,631726.0000,618390.0000,623640.0000,616838.0000,623982.0000,619934.0000,622298.0000,623421.0000,620615.0000,628851.0000,616377.0000,617239.0000,628109.0000,624743.0000,617199.0000,617028.0000,624743.0000,616528.0000,628089.0000,699926.0000,700286.0000,698383.0000,700216.0000,699324.0000,704313.0000,700086.0000,699886.0000,700747.0000,699965.0000,701248.0000,702491.0000,700567.0000,701458.0000,699745.0000,698724.0000,700737.0000,703142.0000,699044.0000,698903.0000,700517.0000,699856.0000,701999.0000,699315.0000,699294.0000,699956.0000,701017.0000,699905.0000,702791.0000,699655.0000,699846.0000,700667.0000,700857.0000,699775.0000,703762.0000,701157.0000,699886.0000,700717.0000,700256.0000,702671.0000,700246.0000,700376.0000,699084.0000,699535.0000,699495.0000,703973.0000,700907.0000,700737.0000,699114.0000,700406.0000,700136.0000,702751.0000,700617.0000,699906.0000,700657.0000,700116.0000,698042.0000,705256.0000,700176.0000,700046.0000,700316.0000,700407.0000,704434.0000,699855.0000,700387.0000,700537.0000,700486.0000,699746.0000,702470.0000,698513.0000,701258.0000,699996.0000" +performing set operations between randomized regions - 3d,"union, small, native",100,5,2562500,5297.5980,5290.1060,5326.0180,66.9094,14.2194,156.2647,"5285.6000,5277.6000,5946.8000,5317.8000,5287.6000,5313.8000,5283.6000,5303.6000,5327.8000,5297.6000,5295.8000,5303.6000,5307.6000,5291.8000,5299.6000,5291.6000,5301.8000,5251.6000,5283.6000,5309.6000,5291.8000,5293.6000,5299.6000,5267.6000,5255.6000,5263.6000,5271.6000,5285.6000,5295.6000,5293.6000,5273.8000,5283.6000,5271.6000,5267.6000,5291.6000,5285.6000,5283.6000,5279.6000,5289.6000,5293.6000,5297.6000,5285.6000,5273.6000,5309.8000,5273.6000,5275.6000,5279.6000,5277.6000,5273.6000,5299.8000,5293.6000,5279.6000,5291.8000,5283.6000,5291.6000,5293.6000,5295.8000,5287.6000,5295.6000,5281.6000,5295.6000,5293.6000,5309.8000,5307.6000,5315.8000,5291.6000,5297.6000,5291.8000,5315.6000,5323.8000,5287.6000,5311.6000,5307.8000,5303.6000,5305.8000,5301.6000,5301.6000,5301.8000,5301.6000,5287.6000,5291.6000,5299.6000,5289.6000,5295.8000,5285.6000,5303.6000,5271.6000,5303.6000,5307.6000,5257.6000,5275.6000,5293.8000,5275.6000,5269.6000,5279.6000,5279.6000,5265.6000,5299.8000,5317.6000,5285.8000" +performing set operations between randomized regions - 3d,"intersection, small, native",100,167,2404800,146.0239,145.8199,146.7146,1.7107,0.5494,3.8496,"146.0778,145.8323,149.7904,148.0539,144.9940,146.7365,147.0359,147.0898,146.3772,161.8503,146.3772,145.6527,146.1916,146.1377,146.1317,146.3713,144.6886,145.8982,145.8922,145.8323,146.1976,146.0120,144.6946,146.0719,146.2515,145.7725,146.4371,145.7126,145.2934,145.6527,145.5988,146.0120,146.0719,145.1737,145.1737,145.7126,146.0120,145.9521,145.8922,145.7725,145.7725,144.9940,145.6587,145.9521,145.6527,145.9581,146.0120,145.4731,145.8323,145.3593,146.0120,145.7725,145.8383,144.6886,145.7126,145.7725,145.9521,145.8982,145.9521,145.8922,144.9341,146.0180,145.7126,145.8323,145.7784,145.7725,144.9940,145.8922,145.7784,145.9521,146.1916,145.4132,145.0539,145.9521,145.8323,146.1377,145.8922,145.8323,144.9341,145.9581,146.1916,145.9521,145.7784,145.7725,146.0719,144.8743,145.4132,145.5928,145.8922,146.1317,145.8982,145.3533,146.1916,145.8982,146.1916,145.8922,145.8982,145.2335,145.5329,146.1317" +performing set operations between randomized regions - 3d,"difference, small, native",100,19,2513700,1199.8795,1196.1753,1215.2058,32.3772,2.6095,75.3830,"1197.4211,1196.4211,1271.7895,1205.3684,1195.3158,1192.1579,1192.6842,1194.2632,1194.7895,1194.2632,1199.0526,1195.3158,1193.7368,1192.6842,1193.7368,1195.3158,1191.6316,1191.1053,1193.2105,1193.7368,1194.2632,1196.8947,1193.2105,1194.2632,1196.4211,1194.2632,1193.7368,1193.7368,1194.2632,1195.8421,1194.7895,1194.7895,1192.1579,1197.9474,1195.3158,1192.1579,1195.3158,1194.8421,1194.7895,1192.1579,1196.3684,1193.2105,1198.4737,1195.3684,1195.8421,1199.5263,1195.8421,1194.2632,1197.9474,1197.4211,1195.8421,1197.9474,1196.3684,1193.7368,1195.8421,1196.8947,1195.3158,1202.2105,1193.7368,1198.4737,1194.7895,1197.4737,1196.3684,1197.4211,1197.9474,1196.4211,1194.2632,1194.7895,1193.7368,1196.8947,1196.4211,1197.4211,1192.1579,1197.4211,1199.0000,1195.8947,1194.7895,1194.2632,1197.4211,1194.2632,1512.2632,1200.5789,1199.5789,1197.9474,1196.3684,1197.4211,1197.4737,1196.8947,1196.8947,1195.8421,1198.5263,1194.2632,1197.4211,1199.5263,1198.5263,1197.4211,1197.9474,1200.6316,1199.0000,1198.4737" +performing set operations between randomized regions - 3d,"union, medium, native",100,1,2707900,26533.4700,26394.0900,26729.5100,836.0478,637.5151,1041.7174,"28582.0000,28703.0000,28873.0000,26438.0000,26318.0000,26238.0000,26258.0000,26218.0000,26278.0000,26067.0000,26238.0000,26268.0000,26027.0000,26187.0000,26408.0000,26248.0000,26208.0000,26358.0000,26208.0000,26078.0000,26157.0000,26158.0000,26278.0000,26228.0000,26268.0000,26328.0000,26248.0000,26268.0000,26117.0000,26258.0000,26468.0000,26317.0000,26277.0000,26288.0000,26268.0000,26278.0000,26008.0000,26278.0000,26207.0000,26369.0000,26047.0000,26108.0000,26268.0000,26107.0000,26138.0000,26037.0000,26258.0000,26308.0000,26198.0000,26328.0000,26298.0000,26218.0000,26218.0000,26147.0000,26128.0000,26128.0000,26147.0000,26048.0000,26308.0000,26208.0000,26188.0000,26137.0000,26328.0000,26328.0000,26138.0000,26208.0000,26338.0000,26137.0000,26087.0000,26318.0000,26158.0000,26158.0000,26107.0000,26198.0000,26258.0000,26158.0000,26187.0000,26348.0000,26429.0000,26237.0000,26317.0000,26237.0000,26168.0000,26268.0000,26288.0000,26278.0000,26228.0000,26288.0000,26208.0000,26368.0000,26278.0000,29674.0000,28743.0000,28632.0000,28683.0000,28662.0000,28743.0000,28632.0000,28733.0000,28562.0000" +performing set operations between randomized regions - 3d,"intersection, medium, native",100,11,2581700,2357.7809,2354.3109,2371.2064,30.5504,6.5093,70.7856,"2345.1818,2366.1818,2407.0909,2367.9091,2347.9091,2361.5455,2346.0909,2354.2727,2359.7273,2357.0000,2351.5455,2345.1818,2356.0909,2347.0000,2363.4545,2356.0909,2346.0909,2357.9091,2357.0909,2362.4545,2354.2727,2358.0000,2357.0000,2347.0000,2353.3636,2366.1818,2354.2727,2351.5455,2351.5455,2356.1818,2356.0909,2361.5455,2356.0909,2357.0000,2354.2727,2352.4545,2353.3636,2347.0000,2355.2727,2350.6364,2351.5455,2351.5455,2343.3636,2349.7273,2349.7273,2356.0909,2357.9091,2361.5455,2358.0000,2361.5455,2363.3636,2351.5455,2350.6364,2352.4545,2357.9091,2360.7273,2651.1818,2349.7273,2357.0000,2357.0000,2356.0909,2351.5455,2337.9091,2352.4545,2351.5455,2350.6364,2358.8182,2364.2727,2357.0909,2358.8182,2345.1818,2347.0000,2368.0000,2350.6364,2347.9091,2360.6364,2354.2727,2348.8182,2351.5455,2352.4545,2350.6364,2358.8182,2347.9091,2354.2727,2360.6364,2354.2727,2352.5455,2355.1818,2353.3636,2357.0000,2337.0000,2352.4545,2348.8182,2357.0000,2362.4545,2347.9091,2351.5455,2359.7273,2365.1818,2344.2727" +performing set operations between randomized regions - 3d,"difference, medium, native",100,3,3485100,12202.0567,12175.9800,12268.9200,195.2276,49.1211,384.9867,"12125.6667,12119.0000,13264.3333,12336.3333,12195.6667,12212.6667,12209.0000,12175.6667,12236.0000,12155.6667,12105.6667,12225.6667,12122.3333,12189.0000,12209.3333,12259.0000,12199.0000,12236.0000,12172.3333,13752.0000,12169.0000,12172.3333,12266.0000,12132.3333,12276.0000,12132.3333,12042.3333,12149.0000,12172.6667,12155.6667,12125.6667,12185.6667,12232.3333,12145.6667,12126.0000,12192.3333,12169.0000,12115.6667,12149.3333,12202.3333,12229.3333,12162.3333,12139.0000,12209.3333,12142.3333,12172.6667,12182.3333,12232.6667,12192.3333,12222.6667,12139.0000,12239.3333,12142.3333,12142.3333,12155.6667,12166.0000,12145.6667,12155.6667,12162.6667,12242.3333,12192.6667,12175.6667,12122.3333,12252.6667,12246.0000,12162.3333,12222.6667,12112.3333,12219.3333,12119.0000,12142.3333,12252.6667,12152.3333,12199.0000,12149.0000,12212.3333,12152.6667,12182.3333,12222.6667,12215.6667,12152.6667,12139.0000,12212.3333,12156.0000,12099.0000,12099.0000,12125.6667,12162.3333,12175.6667,12122.3333,12162.3333,12182.3333,12145.6667,12169.3333,12162.3333,12219.3333,12192.3333,12162.6667,12129.0000,12142.3333" +performing set operations between randomized regions - 3d,"union, large, native",100,1,297510300,2966247.7700,2942202.4900,2987962.3000,116648.1518,106809.4330,123105.2661,"2801432.0000,2804507.0000,2995009.0000,2807773.0000,2795920.0000,2786593.0000,2810729.0000,2807823.0000,2788347.0000,2795760.0000,2796843.0000,3065432.0000,3051526.0000,3056505.0000,3050915.0000,3052527.0000,3041066.0000,3045845.0000,3053440.0000,3052237.0000,3055693.0000,3047148.0000,3061995.0000,3044051.0000,3045354.0000,3049632.0000,3057147.0000,3042438.0000,3055523.0000,3047267.0000,3054782.0000,3056154.0000,3029955.0000,3043991.0000,3046116.0000,3047077.0000,3063819.0000,2856816.0000,2794428.0000,2795750.0000,2795571.0000,2790180.0000,2823613.0000,2792204.0000,2793136.0000,2802403.0000,2796522.0000,2964801.0000,3049021.0000,3050053.0000,3053780.0000,3044893.0000,3051425.0000,3056205.0000,3051966.0000,3047518.0000,3049712.0000,3063188.0000,3051866.0000,3047608.0000,3046005.0000,3050584.0000,3054862.0000,3049552.0000,3057417.0000,3048199.0000,3054802.0000,3045545.0000,3054571.0000,3045564.0000,3055162.0000,3055764.0000,3048941.0000,3050153.0000,3046917.0000,3049332.0000,3047918.0000,3059380.0000,3055403.0000,3038872.0000,3047568.0000,3065983.0000,2801762.0000,2794869.0000,2798666.0000,2786142.0000,2801011.0000,2808855.0000,2800780.0000,2808385.0000,2802163.0000,2799828.0000,2857778.0000,3055453.0000,3064720.0000,3048239.0000,3047658.0000,2890962.0000,2791141.0000,2806841.0000" +performing set operations between randomized regions - 3d,"intersection, large, native",100,2,2843200,13554.3150,13541.4350,13575.3850,82.2617,57.6150,147.2708,"13675.0000,13615.0000,14116.0000,13514.5000,13434.5000,13560.0000,13499.5000,13544.5000,13590.0000,13605.0000,13489.5000,13514.5000,13515.0000,13589.5000,13555.0000,13514.5000,13545.0000,13509.5000,13535.0000,13539.5000,13534.5000,13574.5000,13530.0000,13524.5000,13665.0000,13665.0000,13594.5000,13529.5000,13494.5000,13590.0000,13615.0000,13604.5000,13700.0000,13565.0000,13489.5000,13660.0000,13504.5000,13575.0000,13700.0000,13514.5000,13554.5000,13575.0000,13564.5000,13519.5000,13664.5000,13584.5000,13550.0000,13554.5000,13625.0000,13474.5000,13509.5000,13455.0000,13554.5000,13509.5000,13465.0000,13655.0000,13504.5000,13489.5000,13524.5000,13494.5000,13494.5000,13555.0000,13489.5000,13494.5000,13570.0000,13489.5000,13519.5000,13630.0000,13580.0000,13539.5000,13560.0000,13559.5000,13470.0000,13569.5000,13575.0000,13549.5000,13499.5000,13499.5000,13554.5000,13575.0000,13549.5000,13539.5000,13474.5000,13580.0000,13509.5000,13434.5000,13550.0000,13499.5000,13479.5000,13500.0000,13454.5000,13489.5000,13660.0000,13429.5000,13600.0000,13654.5000,13519.5000,13600.0000,13594.5000,13620.0000" +performing set operations between randomized regions - 3d,"difference, large, native",100,1,666866200,6604205.3800,6521438.2000,6672989.2300,382658.9252,326340.7184,432510.0845,"6795828.0000,6801878.0000,6953336.0000,7273242.0000,6642165.0000,6120606.0000,6956562.0000,6829891.0000,6802690.0000,6804944.0000,6793963.0000,6808231.0000,6795747.0000,6788433.0000,6798692.0000,6799905.0000,6661252.0000,5848751.0000,5818273.0000,5831909.0000,5825437.0000,5963158.0000,6978874.0000,6917899.0000,6818801.0000,6822267.0000,6798933.0000,6807780.0000,6797891.0000,6794394.0000,6794525.0000,6802891.0000,6860209.0000,6802089.0000,6796728.0000,6806527.0000,6807510.0000,6800716.0000,6803532.0000,6803292.0000,6801167.0000,6795046.0000,6797280.0000,6798532.0000,6794064.0000,6795707.0000,6696709.0000,5850174.0000,5816099.0000,5824926.0000,5793005.0000,5825036.0000,6788592.0000,6798142.0000,6795507.0000,6798973.0000,6800596.0000,6801228.0000,6792040.0000,6801548.0000,6799665.0000,6798371.0000,6795016.0000,6805325.0000,6797240.0000,6797931.0000,6789816.0000,6543008.0000,5832250.0000,6135224.0000,6796929.0000,6810886.0000,6799294.0000,6678645.0000,5832149.0000,6356144.0000,6801959.0000,6807149.0000,6801488.0000,6803721.0000,6048791.0000,5831448.0000,5823783.0000,5827370.0000,5816750.0000,6107783.0000,6799264.0000,6804814.0000,6795776.0000,6798341.0000,6803071.0000,6390549.0000,5913624.0000,6797661.0000,6792090.0000,6803191.0000,6799594.0000,6796378.0000,6791328.0000,6802600.0000" +"normalizing a fully mergeable, complex tiling of boxes - 2d","small, native",100,9,2508300,2788.7800,2755.0178,2820.8300,168.1602,158.8960,180.7820,"2914.2222,2910.8889,3107.8889,2621.4444,2599.2222,2598.0000,2584.6667,2601.4444,2606.8889,2599.2222,2590.2222,2584.6667,2576.8889,2570.1111,2569.1111,2576.8889,2573.5556,2593.5556,2582.4444,2576.8889,2573.5556,2590.3333,2579.1111,2581.3333,2575.7778,2571.3333,2584.6667,2558.0000,2581.3333,2575.7778,2575.7778,2584.6667,2574.6667,2574.6667,2583.5556,2593.5556,2575.7778,2569.1111,2566.8889,2568.0000,2586.8889,2568.0000,3138.0000,2920.8889,2922.0000,2925.3333,2926.4444,2914.2222,2916.4444,2912.0000,2914.1111,2904.2222,2905.3333,2905.3333,2907.4444,2913.1111,2926.4444,2916.4444,2925.3333,2908.6667,2915.3333,2918.6667,2922.0000,2911.8889,2919.6667,2925.3333,2907.4444,2906.4444,2919.7778,2912.0000,2913.1111,2914.1111,2915.3333,2916.4444,2918.6667,2922.0000,2920.8889,2919.7778,2909.7778,2923.1111,2916.4444,2899.6667,2912.0000,2904.2222,2903.1111,2920.7778,2914.1111,2907.5556,2903.1111,2906.3333,2906.3333,2891.8889,2912.0000,2907.5556,2918.5556,2921.8889,2910.7778,2918.6667,2918.6667,2919.7778" +"normalizing a fully mergeable, complex tiling of boxes - 2d","small, embedded in 3d",100,8,2652000,3355.1638,3345.8275,3378.5150,67.6566,11.8830,121.7988,"3332.3750,3357.3750,3804.5000,3371.1250,3363.6250,3342.3750,3358.6250,3358.6250,3337.3750,3386.1250,3344.7500,3343.6250,3371.1250,3368.6250,3338.5000,3362.3750,3354.8750,3344.7500,3358.6250,3361.1250,3333.5000,3357.3750,3356.1250,3336.1250,3354.8750,3357.3750,3329.8750,3348.6250,3346.1250,3358.6250,3364.8750,3349.8750,3346.1250,3336.0000,3356.0000,3349.7500,3354.8750,3346.1250,3341.1250,3334.8750,3352.3750,3337.3750,3341.0000,3347.3750,3349.8750,3326.1250,3349.8750,3348.6250,3331.0000,3337.3750,3344.8750,3331.1250,3331.0000,3342.3750,3349.8750,3324.8750,3336.0000,3342.3750,3329.8750,3349.8750,3354.8750,3352.3750,3344.8750,3346.0000,3328.5000,3324.8750,3347.3750,3343.5000,3338.6250,3349.8750,3338.6250,3336.1250,3337.2500,3336.1250,3339.8750,3326.0000,3347.2500,3334.8750,3321.1250,3838.2500,3323.5000,3347.2500,3357.3750,3357.3750,3341.0000,3344.8750,3357.3750,3344.8750,3348.6250,3361.1250,3329.8750,3343.6250,3349.8750,3336.0000,3347.3750,3352.3750,3343.6250,3331.1250,3343.5000,3347.2500" +"normalizing a fully mergeable, complex tiling of boxes - 2d","large, native",100,1,549754700,5429455.3600,5410329.2300,5446334.4900,91892.8841,80412.3972,103121.6516,"5455114.0000,5460424.0000,5458571.0000,5466286.0000,5459843.0000,5459062.0000,5460845.0000,5458020.0000,5459934.0000,5462889.0000,5457128.0000,5460023.0000,5319406.0000,5245005.0000,5353571.0000,5463661.0000,5457940.0000,5465163.0000,5458670.0000,5461436.0000,5461908.0000,5457248.0000,5458621.0000,5559142.0000,5313536.0000,5315850.0000,5341489.0000,5343182.0000,5343823.0000,5385993.0000,5508837.0000,5593006.0000,5510589.0000,5511050.0000,5512744.0000,5507383.0000,5508044.0000,5568679.0000,5508366.0000,5511561.0000,5461056.0000,5395772.0000,5251508.0000,5244445.0000,5242050.0000,5251758.0000,5268310.0000,5373359.0000,5513435.0000,5504368.0000,5505540.0000,5510089.0000,5504988.0000,5509087.0000,5462859.0000,5245357.0000,5240768.0000,5247069.0000,5246439.0000,5269643.0000,5306742.0000,5508526.0000,5511271.0000,5514036.0000,5507834.0000,5509368.0000,5508556.0000,5508094.0000,5513103.0000,5464673.0000,5460013.0000,5463380.0000,5461496.0000,5577266.0000,5317404.0000,5246388.0000,5274131.0000,5274802.0000,5262179.0000,5332271.0000,5460665.0000,5459693.0000,5463620.0000,5463170.0000,5457698.0000,5463680.0000,5460745.0000,5457379.0000,5463600.0000,5459262.0000,5464462.0000,5457148.0000,5458971.0000,5464252.0000,5459813.0000,5455224.0000,5462078.0000,5460504.0000,5457058.0000,5463039.0000" +"normalizing a fully mergeable, complex tiling of boxes - 2d","large, embedded in 3d",100,1,569989300,5635666.3900,5613324.1600,5654540.0200,104470.5063,91025.3282,115945.3939,"5699899.0000,5698015.0000,5743672.0000,5707363.0000,5602664.0000,5432702.0000,5441308.0000,5446588.0000,5447961.0000,5448902.0000,5624807.0000,5709547.0000,5721209.0000,5714556.0000,5696121.0000,5710539.0000,5697474.0000,5706832.0000,5698626.0000,5704898.0000,5701592.0000,5698315.0000,5691393.0000,5697875.0000,5555424.0000,5458791.0000,5428243.0000,5425268.0000,5413365.0000,5442079.0000,5619667.0000,5701101.0000,5696332.0000,5693797.0000,5707864.0000,5699858.0000,5704507.0000,5705750.0000,5697895.0000,5693697.0000,5701121.0000,5707092.0000,5702653.0000,5700640.0000,5712323.0000,5693767.0000,5705439.0000,5701121.0000,5707924.0000,5698675.0000,5714476.0000,5695340.0000,5689138.0000,5690341.0000,5484901.0000,5472396.0000,5599178.0000,5669381.0000,5668729.0000,5660644.0000,5697363.0000,5697093.0000,5702793.0000,5709727.0000,5702173.0000,5706601.0000,5699668.0000,5697154.0000,5693857.0000,5701201.0000,5697584.0000,5691663.0000,5699017.0000,5699879.0000,5689018.0000,5703235.0000,5700439.0000,5689979.0000,5695951.0000,5695871.0000,5533202.0000,5419477.0000,5438223.0000,5487555.0000,5707994.0000,5550114.0000,5444253.0000,5575373.0000,5698957.0000,5699548.0000,5690440.0000,5703254.0000,5698666.0000,5542029.0000,5441288.0000,5446668.0000,5447300.0000,5443522.0000,5472617.0000,5694118.0000" diff --git a/ci/perf/gpuc2_bench.md b/ci/perf/gpuc2_bench.md index 4c0aeb17e..d9e8aaff7 100644 --- a/ci/perf/gpuc2_bench.md +++ b/ci/perf/gpuc2_bench.md @@ -2,96 +2,151 @@ | Metadata | | | :------- | :------------------- | -| Created | 2023-07-13T13:15:07Z | +| Created | 2023-08-15T11:05:25Z | | Test case | Benchmark name | Min | Mean | Std dev | | :------------------------------------------------------------------------------------------------------------------------------------------------ | :----------------------------------------------- | -----------: | -----------: | ---------: | -| benchmark intrusive graph dependency handling with N nodes - 1 | creating nodes | 4.47 | 4.47 | 0.01 | -| benchmark intrusive graph dependency handling with N nodes - 1 | creating and adding dependencies | 22.00 | 22.29 | 1.00 | -| benchmark intrusive graph dependency handling with N nodes - 1 | adding and removing dependencies | 15.45 | 15.51 | 0.06 | -| benchmark intrusive graph dependency handling with N nodes - 1 | checking for dependencies | 1.69 | 1.69 | 0.00 | -| benchmark intrusive graph dependency handling with N nodes - 10 | creating nodes | 38.87 | 38.99 | 0.53 | -| benchmark intrusive graph dependency handling with N nodes - 10 | creating and adding dependencies | 242.08 | 243.30 | 0.55 | -| benchmark intrusive graph dependency handling with N nodes - 10 | adding and removing dependencies | 208.75 | 211.64 | 0.65 | -| benchmark intrusive graph dependency handling with N nodes - 10 | checking for dependencies | 21.00 | 21.45 | 0.14 | -| benchmark intrusive graph dependency handling with N nodes - 100 | creating nodes | 385.44 | 388.28 | 5.05 | -| benchmark intrusive graph dependency handling with N nodes - 100 | creating and adding dependencies | 3'935.33 | 3'975.69 | 77.85 | -| benchmark intrusive graph dependency handling with N nodes - 100 | adding and removing dependencies | 4'688.33 | 4'731.41 | 15.46 | -| benchmark intrusive graph dependency handling with N nodes - 100 | checking for dependencies | 1'931.92 | 1'939.45 | 3.68 | -| benchmark task handling > without access thread | generating and deleting tasks | 2'961'050.00 | 3'402'787.39 | 208'539.60 | -| benchmark task handling > with access thread | generating and deleting tasks with access thread | 6'592'884.00 | 7'187'649.30 | 205'994.12 | -| generating large task graphs | soup topology | 1'062'781.00 | 1'230'049.89 | 125'844.91 | -| generating large task graphs | chain topology | 41'235.00 | 41'601.62 | 533.55 | -| generating large task graphs | expanding tree topology | 61'704.00 | 62'903.18 | 905.79 | -| generating large task graphs | contracting tree topology | 99'054.00 | 99'968.32 | 745.77 | -| generating large task graphs | wave\_sim topology | 392'942.00 | 395'342.44 | 3'425.43 | -| generating large task graphs | jacobi topology | 115'125.00 | 118'149.01 | 6'373.35 | -| generating large command graphs for N nodes - 1 | soup topology | 1'658'130.00 | 1'935'466.23 | 189'986.50 | -| generating large command graphs for N nodes - 1 | chain topology | 135'234.00 | 136'341.02 | 1'304.69 | -| generating large command graphs for N nodes - 1 | expanding tree topology | 160'080.00 | 175'669.04 | 12'153.69 | -| generating large command graphs for N nodes - 1 | contracting tree topology | 194'205.00 | 229'446.59 | 6'413.87 | -| generating large command graphs for N nodes - 1 | wave\_sim topology | 944'186.00 | 1'073'548.20 | 83'059.94 | -| generating large command graphs for N nodes - 1 | jacobi topology | 391'950.00 | 395'045.84 | 2'176.77 | -| generating large command graphs for N nodes - 4 | soup topology | 2'023'833.00 | 2'266'553.74 | 221'082.04 | -| generating large command graphs for N nodes - 4 | chain topology | 364'709.00 | 369'353.67 | 2'336.53 | -| generating large command graphs for N nodes - 4 | expanding tree topology | 400'195.00 | 425'830.47 | 27'770.12 | -| generating large command graphs for N nodes - 4 | contracting tree topology | 432'747.00 | 460'596.50 | 33'147.72 | -| generating large command graphs for N nodes - 4 | wave\_sim topology | 1'971'555.00 | 2'213'953.18 | 154'968.00 | -| generating large command graphs for N nodes - 4 | jacobi topology | 755'930.00 | 880'479.55 | 33'768.26 | -| generating large command graphs for N nodes - 16 | soup topology | 2'593'073.00 | 3'035'953.00 | 245'087.72 | -| generating large command graphs for N nodes - 16 | chain topology | 1'087'629.00 | 1'216'977.56 | 74'800.44 | -| generating large command graphs for N nodes - 16 | expanding tree topology | 985'154.00 | 1'103'570.42 | 78'286.65 | -| generating large command graphs for N nodes - 16 | contracting tree topology | 1'051'871.00 | 1'144'743.77 | 92'931.52 | -| generating large command graphs for N nodes - 16 | wave\_sim topology | 3'735'788.00 | 4'304'168.23 | 326'962.13 | -| generating large command graphs for N nodes - 16 | jacobi topology | 2'144'132.00 | 2'556'989.42 | 400'626.56 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | soup topology | 1'674'481.00 | 1'968'071.06 | 211'892.58 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | chain topology | 137'327.00 | 138'790.39 | 5'699.13 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | expanding tree topology | 187'011.00 | 190'111.19 | 8'880.90 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | contracting tree topology | 230'244.00 | 234'356.93 | 9'499.69 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | wave\_sim topology | 956'791.00 | 1'111'367.61 | 109'799.84 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | jacobi topology | 399'775.00 | 404'537.06 | 10'523.97 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | soup topology | 1'881'604.00 | 1'972'176.59 | 59'188.37 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | chain topology | 289'035.00 | 366'650.01 | 34'952.23 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | expanding tree topology | 431'274.00 | 483'929.77 | 37'888.71 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | contracting tree topology | 366'481.00 | 428'210.32 | 40'813.73 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | wave\_sim topology | 1'030'260.00 | 1'192'128.74 | 177'831.00 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | jacobi topology | 517'297.00 | 605'392.88 | 92'478.56 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | soup topology | 2'669'848.00 | 3'065'291.98 | 139'318.79 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | chain topology | 429'802.00 | 433'794.18 | 11'936.65 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | expanding tree topology | 490'687.00 | 495'026.14 | 11'169.65 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | contracting tree topology | 535'141.00 | 540'362.79 | 14'217.09 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | wave\_sim topology | 2'994'363.00 | 3'168'900.15 | 76'282.61 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | jacobi topology | 904'351.00 | 911'202.79 | 14'389.23 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | soup topology | 2'748'998.00 | 3'012'953.55 | 66'238.57 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | chain topology | 491'258.00 | 535'949.51 | 26'321.60 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | expanding tree topology | 553'957.00 | 618'978.36 | 25'280.45 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | contracting tree topology | 605'395.00 | 648'933.38 | 28'509.80 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | wave\_sim topology | 3'183'903.00 | 3'294'001.55 | 107'426.11 | -| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | jacobi topology | 926'443.00 | 998'603.13 | 26'532.30 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | soup topology | 2'511'608.00 | 2'560'884.04 | 92'063.43 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | chain topology | 372'443.00 | 377'234.48 | 10'308.71 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | expanding tree topology | 464'107.00 | 470'129.31 | 11'413.89 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | contracting tree topology | 438'027.00 | 510'464.84 | 22'710.00 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | wave\_sim topology | 2'003'255.00 | 2'301'567.67 | 143'440.07 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | jacobi topology | 899'453.00 | 917'486.16 | 43'100.71 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | soup topology | 1'540'196.00 | 1'902'718.93 | 168'622.04 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | chain topology | 576'330.00 | 638'199.86 | 97'317.00 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | expanding tree topology | 745'972.00 | 894'178.79 | 115'224.64 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | contracting tree topology | 753'716.00 | 867'071.17 | 86'011.54 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | wave\_sim topology | 2'029'393.00 | 2'310'273.64 | 142'832.93 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | jacobi topology | 1'062'171.00 | 1'188'635.17 | 158'475.13 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | soup topology | 3'045'109.00 | 3'497'125.09 | 165'274.57 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | chain topology | 666'781.00 | 673'032.95 | 12'582.24 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | expanding tree topology | 706'086.00 | 743'359.50 | 33'838.10 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | contracting tree topology | 738'767.00 | 790'771.37 | 42'404.77 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | wave\_sim topology | 4'057'107.00 | 4'417'184.71 | 119'845.71 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | jacobi topology | 1'404'158.00 | 1'425'365.39 | 45'998.34 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | soup topology | 2'621'185.00 | 2'977'127.88 | 163'293.51 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | chain topology | 581'168.00 | 606'423.43 | 23'381.63 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | expanding tree topology | 747'584.00 | 812'109.58 | 39'281.36 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | contracting tree topology | 705'905.00 | 781'869.11 | 40'428.20 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | wave\_sim topology | 3'018'900.00 | 3'343'372.62 | 99'925.81 | -| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | jacobi topology | 1'116'203.00 | 1'230'274.46 | 187'567.99 | +| benchmark intrusive graph dependency handling with N nodes - 1 | creating nodes | 4.47 | 4.48 | 0.05 | +| benchmark intrusive graph dependency handling with N nodes - 1 | creating and adding dependencies | 20.77 | 22.31 | 3.79 | +| benchmark intrusive graph dependency handling with N nodes - 1 | adding and removing dependencies | 15.45 | 15.49 | 0.20 | +| benchmark intrusive graph dependency handling with N nodes - 1 | checking for dependencies | 1.69 | 1.70 | 0.01 | +| benchmark intrusive graph dependency handling with N nodes - 10 | creating nodes | 41.05 | 41.23 | 0.54 | +| benchmark intrusive graph dependency handling with N nodes - 10 | creating and adding dependencies | 248.25 | 249.65 | 4.34 | +| benchmark intrusive graph dependency handling with N nodes - 10 | adding and removing dependencies | 218.58 | 219.90 | 3.34 | +| benchmark intrusive graph dependency handling with N nodes - 10 | checking for dependencies | 24.02 | 24.45 | 0.35 | +| benchmark intrusive graph dependency handling with N nodes - 100 | creating nodes | 447.25 | 450.83 | 6.61 | +| benchmark intrusive graph dependency handling with N nodes - 100 | creating and adding dependencies | 4'172.67 | 4'197.84 | 25.19 | +| benchmark intrusive graph dependency handling with N nodes - 100 | adding and removing dependencies | 4'673.50 | 4'740.53 | 84.89 | +| benchmark intrusive graph dependency handling with N nodes - 100 | checking for dependencies | 1'909.62 | 1'919.19 | 4.73 | +| benchmark task handling > without access thread | generating and deleting tasks | 2'908'114.00 | 3'539'792.56 | 245'227.82 | +| benchmark task handling > with access thread | generating and deleting tasks with access thread | 7'490'145.00 | 8'288'538.37 | 353'791.74 | +| generating large task graphs | soup topology | 930'072.00 | 937'794.74 | 13'935.32 | +| generating large task graphs | chain topology | 31'898.00 | 32'179.30 | 682.05 | +| generating large task graphs | expanding tree topology | 57'216.00 | 57'955.07 | 757.21 | +| generating large task graphs | contracting tree topology | 82'183.00 | 83'252.53 | 1'163.77 | +| generating large task graphs | wave\_sim topology | 303'494.00 | 342'686.28 | 13'026.07 | +| generating large task graphs | jacobi topology | 105'378.00 | 106'786.90 | 5'293.24 | +| generating large command graphs for N nodes - 1 | soup topology | 1'632'875.00 | 1'689'353.68 | 132'245.72 | +| generating large command graphs for N nodes - 1 | chain topology | 108'164.00 | 109'150.89 | 2'329.18 | +| generating large command graphs for N nodes - 1 | expanding tree topology | 179'679.00 | 183'438.96 | 7'827.36 | +| generating large command graphs for N nodes - 1 | contracting tree topology | 215'136.00 | 232'890.15 | 32'931.33 | +| generating large command graphs for N nodes - 1 | wave\_sim topology | 921'446.00 | 1'032'403.60 | 87'112.83 | +| generating large command graphs for N nodes - 1 | jacobi topology | 344'692.00 | 348'828.90 | 10'360.36 | +| generating large command graphs for N nodes - 4 | soup topology | 1'701'716.00 | 1'981'342.14 | 202'934.48 | +| generating large command graphs for N nodes - 4 | chain topology | 368'917.00 | 374'809.61 | 14'633.90 | +| generating large command graphs for N nodes - 4 | expanding tree topology | 440'694.00 | 454'699.08 | 32'652.37 | +| generating large command graphs for N nodes - 4 | contracting tree topology | 424'403.00 | 449'588.58 | 30'827.42 | +| generating large command graphs for N nodes - 4 | wave\_sim topology | 1'907'616.00 | 2'138'371.34 | 138'912.72 | +| generating large command graphs for N nodes - 4 | jacobi topology | 801'098.00 | 819'192.66 | 47'262.49 | +| generating large command graphs for N nodes - 16 | soup topology | 2'204'930.00 | 2'657'276.51 | 164'869.78 | +| generating large command graphs for N nodes - 16 | chain topology | 1'065'108.00 | 1'205'512.77 | 135'152.59 | +| generating large command graphs for N nodes - 16 | expanding tree topology | 936'053.00 | 1'098'224.64 | 50'667.87 | +| generating large command graphs for N nodes - 16 | contracting tree topology | 1'016'285.00 | 1'187'542.99 | 32'237.31 | +| generating large command graphs for N nodes - 16 | wave\_sim topology | 3'488'505.00 | 4'072'216.02 | 254'970.25 | +| generating large command graphs for N nodes - 16 | jacobi topology | 2'013'908.00 | 2'370'602.36 | 101'334.51 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | soup topology | 1'375'076.00 | 1'672'275.65 | 50'534.41 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | chain topology | 107'833.00 | 123'916.17 | 8'936.11 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | expanding tree topology | 182'875.00 | 185'404.86 | 1'975.20 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | contracting tree topology | 216'799.00 | 220'399.81 | 2'115.35 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | wave\_sim topology | 928'650.00 | 1'058'674.11 | 53'253.72 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: single-threaded immediate graph generation | jacobi topology | 349'521.00 | 352'429.43 | 2'282.00 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | soup topology | 1'084'906.00 | 1'282'751.91 | 135'906.07 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | chain topology | 291'430.00 | 331'976.11 | 24'368.73 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | expanding tree topology | 342'157.00 | 398'219.50 | 28'857.17 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | contracting tree topology | 287'483.00 | 335'595.40 | 51'698.98 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | wave\_sim topology | 917'148.00 | 1'095'232.79 | 178'500.95 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > immediate submission to a scheduler thread | jacobi topology | 398'144.00 | 487'244.29 | 81'480.83 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | soup topology | 2'378'700.00 | 2'611'775.54 | 126'766.08 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | chain topology | 400'328.00 | 413'897.72 | 8'357.35 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | expanding tree topology | 487'052.00 | 489'515.30 | 2'617.24 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | contracting tree topology | 492'813.00 | 521'666.67 | 9'005.11 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | wave\_sim topology | 2'959'461.00 | 3'089'699.41 | 68'614.06 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > reference: throttled single-threaded graph generation at 10 us per task | jacobi topology | 803'082.00 | 826'832.18 | 25'621.27 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | soup topology | 2'150'897.00 | 2'443'104.82 | 99'371.77 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | chain topology | 413'091.00 | 463'588.79 | 41'924.64 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | expanding tree topology | 472'935.00 | 508'797.97 | 28'515.18 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | contracting tree topology | 577'102.00 | 600'065.14 | 20'494.09 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | wave\_sim topology | 2'861'665.00 | 3'056'070.87 | 99'648.02 | +| building command graphs in a dedicated scheduler thread for N nodes - 1 > throttled submission to a scheduler thread at 10 us per task | jacobi topology | 780'198.00 | 869'813.79 | 56'456.72 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | soup topology | 1'718'107.00 | 1'949'065.24 | 167'863.59 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | chain topology | 377'965.00 | 381'401.18 | 2'473.20 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | expanding tree topology | 394'807.00 | 438'970.17 | 25'710.97 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | contracting tree topology | 491'651.00 | 495'691.44 | 2'622.53 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | wave\_sim topology | 1'932'734.00 | 2'197'912.64 | 120'132.53 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: single-threaded immediate graph generation | jacobi topology | 700'026.00 | 732'671.20 | 53'491.03 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | soup topology | 1'083'914.00 | 1'419'588.32 | 200'901.20 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | chain topology | 488'054.00 | 577'368.12 | 104'205.51 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | expanding tree topology | 625'615.00 | 722'413.86 | 116'245.51 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | contracting tree topology | 602'781.00 | 717'874.40 | 128'773.76 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | wave\_sim topology | 2'124'318.00 | 2'256'870.05 | 270'953.46 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > immediate submission to a scheduler thread | jacobi topology | 977'332.00 | 1'071'575.44 | 247'317.12 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | soup topology | 2'732'781.00 | 2'994'235.01 | 147'816.95 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | chain topology | 617'800.00 | 647'766.51 | 26'379.22 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | expanding tree topology | 753'588.00 | 758'476.20 | 7'051.86 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | contracting tree topology | 733'880.00 | 796'035.10 | 13'798.97 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | wave\_sim topology | 3'958'395.00 | 4'202'760.45 | 146'182.78 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > reference: throttled single-threaded graph generation at 10 us per task | jacobi topology | 1'211'356.00 | 1'314'388.62 | 47'234.14 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | soup topology | 2'184'451.00 | 2'462'239.75 | 145'359.32 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | chain topology | 505'447.00 | 589'295.60 | 70'639.30 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | expanding tree topology | 637'948.00 | 737'075.67 | 111'095.34 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | contracting tree topology | 653'538.00 | 755'668.89 | 111'778.19 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | wave\_sim topology | 2'898'195.00 | 3'155'609.80 | 164'178.56 | +| building command graphs in a dedicated scheduler thread for N nodes - 4 > throttled submission to a scheduler thread at 10 us per task | jacobi topology | 955'570.00 | 1'059'364.76 | 142'363.14 | +| normalizing randomized box sets - 2d | small, native | 633.21 | 636.60 | 5.19 | +| normalizing randomized box sets - 2d | small, embedded in 3d | 727.10 | 732.69 | 18.98 | +| normalizing randomized box sets - 2d | medium, native | 6'692.25 | 6'953.20 | 387.69 | +| normalizing randomized box sets - 2d | medium, embedded in 3d | 7'296.67 | 7'390.89 | 277.67 | +| normalizing randomized box sets - 2d | large, native | 265'832.00 | 267'138.98 | 1'081.56 | +| normalizing randomized box sets - 2d | large, embedded in 3d | 268'497.00 | 275'672.21 | 7'292.93 | +| normalizing randomized box sets - 3d | small - native | 3'136.88 | 3'168.28 | 66.11 | +| normalizing randomized box sets - 3d | medium - native | 11'120.33 | 11'216.22 | 298.87 | +| normalizing randomized box sets - 3d | large - native | 2'820'398.00 | 3'057'791.52 | 86'470.82 | +| normalizing a fully mergeable tiling of boxes - 1 | small, native | 36.28 | 36.99 | 0.29 | +| normalizing a fully mergeable tiling of boxes - 1 | small, embedded in 3d | 55.00 | 55.90 | 0.66 | +| normalizing a fully mergeable tiling of boxes - 1 | medium, native | 303.87 | 304.92 | 0.91 | +| normalizing a fully mergeable tiling of boxes - 1 | medium, embedded in 3d | 496.10 | 500.60 | 3.83 | +| normalizing a fully mergeable tiling of boxes - 1 | large, native | 8'936.33 | 8'995.55 | 109.11 | +| normalizing a fully mergeable tiling of boxes - 1 | large, embedded in 3d | 12'119.00 | 12'190.21 | 139.61 | +| normalizing a fully mergeable tiling of boxes - 2 | small, native | 115.96 | 116.78 | 1.30 | +| normalizing a fully mergeable tiling of boxes - 2 | small, embedded in 3d | 125.88 | 127.33 | 1.96 | +| normalizing a fully mergeable tiling of boxes - 2 | medium, native | 912.42 | 928.81 | 12.34 | +| normalizing a fully mergeable tiling of boxes - 2 | medium, embedded in 3d | 1'044.38 | 1'058.99 | 6.30 | +| normalizing a fully mergeable tiling of boxes - 2 | large, native | 36'928.00 | 37'417.90 | 593.97 | +| normalizing a fully mergeable tiling of boxes - 2 | large, embedded in 3d | 40'575.00 | 40'948.93 | 446.37 | +| normalizing a fully mergeable tiling of boxes - 3 | small, native | 260.15 | 262.95 | 3.68 | +| normalizing a fully mergeable tiling of boxes - 3 | medium, native | 1'558.44 | 1'577.92 | 10.03 | +| normalizing a fully mergeable tiling of boxes - 3 | large, native | 46'917.00 | 47'880.54 | 1'967.55 | +| performing set operations between randomized regions - 2d | union, small, native | 1'067.38 | 1'077.82 | 8.35 | +| performing set operations between randomized regions - 2d | union, small, embedded in 3d | 1'221.71 | 1'232.27 | 9.86 | +| performing set operations between randomized regions - 2d | intersection, small, native | 236.25 | 238.23 | 1.15 | +| performing set operations between randomized regions - 2d | intersection, small, embedded in 3d | 278.58 | 280.94 | 1.77 | +| performing set operations between randomized regions - 2d | difference, small, native | 1'026.46 | 1'038.96 | 9.80 | +| performing set operations between randomized regions - 2d | difference, small, embedded in 3d | 1'210.76 | 1'228.75 | 8.00 | +| performing set operations between randomized regions - 2d | union, medium, native | 14'797.00 | 14'946.57 | 306.21 | +| performing set operations between randomized regions - 2d | union, medium, embedded in 3d | 17'738.00 | 17'916.94 | 317.17 | +| performing set operations between randomized regions - 2d | intersection, medium, native | 2'368.30 | 2'435.91 | 56.39 | +| performing set operations between randomized regions - 2d | intersection, medium, embedded in 3d | 2'121.42 | 2'140.49 | 10.62 | +| performing set operations between randomized regions - 2d | difference, medium, native | 8'358.67 | 8'445.05 | 188.38 | +| performing set operations between randomized regions - 2d | difference, medium, embedded in 3d | 8'932.67 | 9'147.82 | 857.87 | +| performing set operations between randomized regions - 2d | union, large, native | 196'881.00 | 205'830.35 | 7'996.30 | +| performing set operations between randomized regions - 2d | union, large, embedded in 3d | 216'659.00 | 217'603.88 | 1'096.99 | +| performing set operations between randomized regions - 2d | intersection, large, native | 20'773.50 | 20'942.19 | 223.68 | +| performing set operations between randomized regions - 2d | intersection, large, embedded in 3d | 20'132.00 | 20'347.26 | 225.00 | +| performing set operations between randomized regions - 2d | difference, large, native | 583'575.00 | 636'077.87 | 23'237.67 | +| performing set operations between randomized regions - 2d | difference, large, embedded in 3d | 616'377.00 | 680'331.16 | 34'292.99 | +| performing set operations between randomized regions - 3d | union, small, native | 5'251.60 | 5'297.60 | 66.91 | +| performing set operations between randomized regions - 3d | intersection, small, native | 144.69 | 146.02 | 1.71 | +| performing set operations between randomized regions - 3d | difference, small, native | 1'191.11 | 1'199.88 | 32.38 | +| performing set operations between randomized regions - 3d | union, medium, native | 26'008.00 | 26'533.47 | 836.05 | +| performing set operations between randomized regions - 3d | intersection, medium, native | 2'337.00 | 2'357.78 | 30.55 | +| performing set operations between randomized regions - 3d | difference, medium, native | 12'042.33 | 12'202.06 | 195.23 | +| performing set operations between randomized regions - 3d | union, large, native | 2'786'142.00 | 2'966'247.77 | 116'648.15 | +| performing set operations between randomized regions - 3d | intersection, large, native | 13'429.50 | 13'554.32 | 82.26 | +| performing set operations between randomized regions - 3d | difference, large, native | 5'793'005.00 | 6'604'205.38 | 382'658.93 | +| normalizing a fully mergeable, complex tiling of boxes - 2d | small, native | 2'558.00 | 2'788.78 | 168.16 | +| normalizing a fully mergeable, complex tiling of boxes - 2d | small, embedded in 3d | 3'321.12 | 3'355.16 | 67.66 | +| normalizing a fully mergeable, complex tiling of boxes - 2d | large, native | 5'240'768.00 | 5'429'455.36 | 91'892.88 | +| normalizing a fully mergeable, complex tiling of boxes - 2d | large, embedded in 3d | 5'413'365.00 | 5'635'666.39 | 104'470.51 | All numbers are in nanoseconds. diff --git a/include/accessor.h b/include/accessor.h index 98488d9a5..d510be8ac 100644 --- a/include/accessor.h +++ b/include/accessor.h @@ -186,9 +186,8 @@ class accessor : public detail::accessor_base // We currently don't support boundary checking for accessors created using accessor_testspy::make_device_accessor, // which does not set m_oob_indices. if(m_oob_indices != nullptr) { - const id all_true = detail::id_cast(id<3>(true, true, true)); - const bool is_within_bounds_lo = (index >= m_accessed_virtual_subrange.offset) == all_true; - const bool is_within_bounds_hi = (index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range)) == all_true; + const bool is_within_bounds_lo = all_true(index >= m_accessed_virtual_subrange.offset); + const bool is_within_bounds_hi = all_true(index < (m_accessed_virtual_subrange.offset + m_accessed_virtual_subrange.range)); if((!is_within_bounds_lo || !is_within_bounds_hi)) { for(int d = 0; d < Dims; ++d) { sycl::atomic_ref{m_oob_indices[0][d]}.fetch_min(index[d]); diff --git a/include/buffer.h b/include/buffer.h index 38f933a40..ef5024daa 100644 --- a/include/buffer.h +++ b/include/buffer.h @@ -3,7 +3,6 @@ #include #include -#include #include "buffer_manager.h" #include "lifetime_extending_state.h" diff --git a/include/buffer_manager.h b/include/buffer_manager.h index 4ff9db73a..e5969caff 100644 --- a/include/buffer_manager.h +++ b/include/buffer_manager.h @@ -375,8 +375,8 @@ namespace detail { resize_info result; if(!is_inside_old_range) { result.resize_required = true; - result.new_offset = min_id(request_offset, buffer.offset); - result.new_range = range_cast<3>(id_cast<3>(max_range(old_abs_range, new_abs_range)) - result.new_offset); + result.new_offset = id_min(request_offset, buffer.offset); + result.new_range = range_cast<3>(id_cast<3>(range_max(old_abs_range, new_abs_range)) - result.new_offset); } return result; } diff --git a/include/buffer_storage.h b/include/buffer_storage.h index db621a8b7..8b20ed877 100644 --- a/include/buffer_storage.h +++ b/include/buffer_storage.h @@ -116,8 +116,8 @@ namespace detail { inline void assert_copy_is_in_range( const range<3>& source_range, const range<3>& target_range, const id<3>& source_offset, const id<3>& target_offset, const range<3>& copy_range) { - assert(max_range(source_range, range_cast<3>(source_offset + copy_range)) == source_range); - assert(max_range(target_range, range_cast<3>(target_offset + copy_range)) == target_range); + assert(range_max(source_range, range_cast<3>(source_offset + copy_range)) == source_range); + assert(range_max(target_range, range_cast<3>(target_offset + copy_range)) == target_range); } template diff --git a/include/buffer_transfer_manager.h b/include/buffer_transfer_manager.h index 474c7440b..b5dc309b5 100644 --- a/include/buffer_transfer_manager.h +++ b/include/buffer_transfer_manager.h @@ -63,23 +63,23 @@ namespace detail { struct incoming_transfer_handle : transfer_handle { incoming_transfer_handle(const size_t num_nodes) : m_num_nodes(num_nodes) {} - void set_expected_region(GridRegion<3> region) { m_expected_region = std::move(region); } + void set_expected_region(region<3> region) { m_expected_region = std::move(region); } void add_transfer(std::unique_ptr&& t) { assert(!complete); assert(t->frame->rid == 0 || m_is_reduction || m_transfers.empty()); // Either all or none m_is_reduction = t->frame->rid != 0; - const auto box = subrange_to_grid_box(t->frame->sr); - assert(GridRegion<3>::intersect(m_received_region, box).empty() || m_is_reduction); - assert(!m_expected_region.has_value() || GridRegion<3>::difference(box, *m_expected_region).empty()); - m_received_region = GridRegion<3>::merge(m_received_region, box); + const auto box = detail::box(t->frame->sr); + assert(region_intersection(m_received_region, box).empty() || m_is_reduction); + assert(!m_expected_region.has_value() || region_difference(box, *m_expected_region).empty()); + m_received_region = region_union(m_received_region, box); m_transfers.push_back(std::move(t)); } bool received_full_region() const { if(!m_expected_region.has_value()) return false; if(m_is_reduction) { - assert(m_expected_region->area() == 1); + assert(m_expected_region->get_area() == 1); // For reductions we're waiting to receive one message per peer return m_transfers.size() == m_num_nodes - 1; } @@ -99,8 +99,8 @@ namespace detail { size_t m_num_nodes; // Number of nodes in the system, required for reductions bool m_is_reduction = false; std::vector> m_transfers; - std::optional> m_expected_region; // This will only be set once the await push job has started - GridRegion<3> m_received_region; + std::optional> m_expected_region; // This will only be set once the await push job has started + region<3> m_received_region; }; struct transfer_out { diff --git a/include/command.h b/include/command.h index 312091a49..ea1ebd10e 100644 --- a/include/command.h +++ b/include/command.h @@ -70,14 +70,14 @@ namespace detail { class await_push_command final : public abstract_command { friend class command_graph; - await_push_command(command_id cid, buffer_id bid, reduction_id rid, transfer_id trid, GridRegion<3> region) + await_push_command(command_id cid, buffer_id bid, reduction_id rid, transfer_id trid, region<3> region) : abstract_command(cid), m_bid(bid), m_rid(rid), m_trid(trid), m_region(std::move(region)) {} public: buffer_id get_bid() const { return m_bid; } reduction_id get_reduction_id() const { return m_rid; } transfer_id get_transfer_id() const { return m_trid; } - GridRegion<3> get_region() const { return m_region; } + region<3> get_region() const { return m_region; } private: buffer_id m_bid; @@ -85,7 +85,7 @@ namespace detail { // but it allows us to sanity check that they match as well as include the ID during graph printing. reduction_id m_rid; transfer_id m_trid; - GridRegion<3> m_region; + region<3> m_region; }; class reduction_command final : public abstract_command { @@ -184,7 +184,7 @@ namespace detail { buffer_id bid; reduction_id rid; transfer_id trid; - GridRegion<3> region; + region<3> region; }; struct reduction_data { diff --git a/include/distributed_graph_generator.h b/include/distributed_graph_generator.h index f28d54cf8..bbe74641f 100644 --- a/include/distributed_graph_generator.h +++ b/include/distributed_graph_generator.h @@ -100,7 +100,7 @@ class distributed_graph_generator { void generate_distributed_commands(const task& tsk); void generate_anti_dependencies( - task_id tid, buffer_id bid, const region_map& last_writers_map, const GridRegion<3>& write_req, abstract_command* write_cmd); + task_id tid, buffer_id bid, const region_map& last_writers_map, const region<3>& write_req, abstract_command* write_cmd); void process_task_side_effect_requirements(const task& tsk); @@ -117,7 +117,7 @@ class distributed_graph_generator { void prune_commands_before(const command_id epoch); private: - using buffer_read_map = std::unordered_map>; + using buffer_read_map = std::unordered_map>; using side_effect_map = std::unordered_map; size_t m_num_nodes; diff --git a/include/fence.h b/include/fence.h index 5c3e48259..626dbcbe6 100644 --- a/include/fence.h +++ b/include/fence.h @@ -93,7 +93,7 @@ class buffer_fence_promise final : public detail::fence_promise { void fulfill() override { const auto access_info = runtime::get_instance().get_buffer_manager().access_host_buffer(get_buffer_id(m_buffer), access_mode::read, m_subrange); - assert((id_cast(access_info.backing_buffer_offset) <= m_subrange.offset) == id_cast(id<3>(true, true, true))); + assert(all_true(id_cast(access_info.backing_buffer_offset) <= m_subrange.offset)); auto data = std::make_unique(m_subrange.range.size()); memcpy_strided_host(access_info.ptr, data.get(), sizeof(DataT), range_cast(access_info.backing_buffer_range), m_subrange.offset - id_cast(access_info.backing_buffer_offset), m_subrange.range, {}, m_subrange.range); diff --git a/include/grid.h b/include/grid.h index 1802ccb0d..b6662ed7c 100644 --- a/include/grid.h +++ b/include/grid.h @@ -1,54 +1,333 @@ #pragma once -#include -#include -#undef assert_fail // Incompatible with fmt +#include +#include +#include +#include #include "ranges.h" +#include "workaround.h" -namespace celerity { -namespace detail { +namespace celerity::detail { - using namespace allscale::api::user::data; +template +class box; - inline GridPoint<1> id_to_grid_point(id<1> id) { return GridPoint<1>(id[0]); } +template +class region; - inline GridPoint<2> id_to_grid_point(id<2> id) { return GridPoint<2>(id[0], id[1]); } +} // namespace celerity::detail - inline GridPoint<3> id_to_grid_point(id<3> id) { return GridPoint<3>(id[0], id[1], id[2]); } +namespace celerity::detail::grid_detail { - // The AllScale classes use a different template type for dimensions (size_t), which can lead to some type inference issues. - // We thus have to provide all instantiations explicitly as overloads below. - namespace impl { +struct normalized_t { +} inline constexpr normalized; - template - GridBox subrange_to_grid_box(const subrange& sr) { - return GridBox(id_to_grid_point(sr.offset), id_to_grid_point(sr.offset + sr.range)); +struct non_empty_t { +} inline constexpr non_empty; + +template +box make_box(Params&&... args) { + return box(std::forward(args)...); +} + +template +region make_region(Params&&... args) { + return region(std::forward(args)...); +} + +template +int get_min_dimensions(const InputIterator first, const InputIterator last) { + return std::accumulate(first, last, 0, [](const int min_dims, const auto& box) { return std::max(min_dims, box.get_min_dimensions()); }); +} + +} // namespace celerity::detail::grid_detail + +namespace celerity::detail { + +/// An arbitrary-dimensional box described by its minimum and maximum points. +template +class box /* class instead of struct: enforces min <= max invariant */ { + public: + static_assert(Dims >= 0); + static constexpr int dimensions = Dims; + + /// Construct an empty box for Dims > 0, and a unit-sized box for Dims == 0 + box() = default; + + /// Construct a box from two points where `min` must be less or equal to `max` in every dimension. + /// Empty boxes are normalized to [0,0,0] - [0,0,0], meaning that every box-shaped set of points has a unique representation. + box(const id& min, const id& max) { + bool non_empty = true; + for(int d = 0; d < Dims; ++d) { + // Ideally all coordinates would be signed types, but since id and range must be unsigned to conform with SYCL, we trap size_t overflows and + // incorrect casts from negative integers in user code in this assertion. + CELERITY_DETAIL_ASSERT_ON_HOST(std::max(min[d], max[d]) < std::numeric_limits::max() / 2 && "potential integer overflow detected"); + // Precondition: + CELERITY_DETAIL_ASSERT_ON_HOST(min[d] <= max[d]); + non_empty &= min[d] < max[d]; } + m_min = non_empty ? min : id{}; + m_max = non_empty ? max : id{}; + } - template - subrange grid_box_to_subrange(const GridBox& box) { - const auto& box_min = box.get_min(); - const auto& box_max = box.get_max(); - id min; - id max; - for(int i = 0; i < Dims; ++i) { - min[i] = box_min[i]; - max[i] = box_max[i]; - } - return subrange{min, range_cast(max - min)}; + box(const subrange& other) : box(other.offset, other.offset + other.range) { +#if CELERITY_DETAIL_ENABLE_DEBUG + for(int d = 0; d < Dims; ++d) { + CELERITY_DETAIL_ASSERT_ON_HOST(other.range[d] < std::numeric_limits::max() - other.offset[d]); + } +#endif + } + + bool empty() const { + if constexpr(Dims > 0) { + return m_max[0] == 0; // empty boxes are normalized to [0,0,0] - [0,0,0] + } else { + return false; // edge case: min == max, but 0-dimensional boxes are always size 1 + } + } + + const id& get_min() const { return m_min; } + const id& get_max() const { return m_max; } + + const id& get_offset() const { return m_min; } + range get_range() const { return range_cast(m_max - m_min); } + subrange get_subrange() const { return {get_offset(), get_range()}; } + operator subrange() const { return get_subrange(); } + + /// Counts the number of points covered by the region. + size_t get_area() const { return get_range().size(); } + + /// Returns the smallest dimensionality that `*this` can be `box_cast` to. + int get_min_dimensions() const { + if(empty()) return 1; // edge case: a 0-dimensional box is always non-empty + for(int dims = Dims; dims > 0; --dims) { + if(m_max[dims - 1] > 1) { return dims; } + } + return 0; + } + + bool covers(const box& other) const { + for(int d = 0; d < Dims; ++d) { + if(other.m_min[d] < m_min[d]) return false; + if(other.m_max[d] > m_max[d]) return false; + } + return true; + } + + friend bool operator==(const box& lhs, const box& rhs) { return lhs.m_min == rhs.m_min && lhs.m_max == rhs.m_max; } + friend bool operator!=(const box& lhs, const box& rhs) { return !operator==(lhs, rhs); } + + private: + template + friend box grid_detail::make_box(P&&... args); + + id m_min; + id m_max; + + // fast code path for grid algorithms that does not attempt to normalize empty boxes + box(grid_detail::non_empty_t /* tag */, const id& min, const id& max) : m_min(min), m_max(max) { +#if CELERITY_DETAIL_ENABLE_DEBUG + for(int d = 0; d < Dims; ++d) { + CELERITY_DETAIL_ASSERT_ON_HOST(min[d] < max[d]); + } +#endif + } +}; + +/// Boxes can be cast between dimensionalities as long as no information is lost (i.e. a cast to a higher dimensionality is always round-trip safe). +template +box box_cast(const box& in) { + CELERITY_DETAIL_ASSERT_ON_HOST(in.get_min_dimensions() <= DimsOut); + return box(subrange_cast(in.get_subrange())); // cast through subrange to fill missing range dimensions with 1s +} + +template +box bounding_box(const box& box1, const box& box2) { + const auto min = id_min(box1.get_min(), box2.get_min()); + const auto max = id_max(box1.get_max(), box2.get_max()); + return box(min, max); +} + +template +auto bounding_box(InputIterator first, const InputIterator last) { + using box_type = typename std::iterator_traits::value_type; + if(first == last) { + assert(box_type::dimensions > 0); // box<0> can never be empty + return box_type(); + } + + const auto init = *first; + return std::accumulate(++first, last, init, bounding_box); +} + +template +auto bounding_box(const Range& range) { + using std::begin, std::end; + return bounding_box(begin(range), end(range)); +} + +template +box box_intersection(const box& box1, const box& box2) { + const auto min = id_max(box1.get_min(), box2.get_min()); + const auto max = id_min(box1.get_max(), box2.get_max()); + for(int d = 0; d < Dims; ++d) { + if(min[d] >= max[d]) return {}; + } + return {min, max}; +} + +/// Comparison operator (similar to std::less) that orders boxes by their minimum, then their maximum, both starting with the first ("slowest") dimension. +/// This ordering is somewhat arbitrary but allows equality comparisons between ordered sequences of boxes (i.e., regions) +struct box_coordinate_order { + template + bool operator()(const box& lhs, const box& rhs) const { + for(int d = 0; d < Dims; ++d) { + if(lhs.get_min()[d] < rhs.get_min()[d]) return true; + if(lhs.get_min()[d] > rhs.get_min()[d]) return false; + } + for(int d = 0; d < Dims; ++d) { + if(lhs.get_max()[d] < rhs.get_max()[d]) return true; + if(lhs.get_max()[d] > rhs.get_max()[d]) return false; } + return false; + } +}; + +/// An arbitrary-dimensional set of points described by a normalized tiling of boxes. +template +class region { + public: + constexpr static int dimensions = Dims; + using box = detail::box; + + region() = default; + region(const box& single_box); + region(const subrange& single_sr); + + /// Constructs a region by normalizing an arbitrary, potentially-overlapping tiling of boxes. + explicit region(std::vector&& boxes); + + const std::vector& get_boxes() const& { return m_boxes; } + + std::vector into_boxes() && { return std::move(m_boxes); } + + bool empty() const { return m_boxes.empty(); } + + /// Counts the number of points covered by the region. + size_t get_area() const { + return std::accumulate(m_boxes.begin(), m_boxes.end(), size_t{0}, [](const size_t area, const box& box) { return area + box.get_area(); }); + } + + /// Returns the smallest dimensionality that `*this` can be `region_cast` to. + int get_min_dimensions() const { return grid_detail::get_min_dimensions(m_boxes.begin(), m_boxes.end()); } + + friend bool operator==(const region& lhs, const region& rhs) { return lhs.m_boxes == rhs.m_boxes; } + friend bool operator!=(const region& lhs, const region& rhs) { return !(lhs == rhs); } + + private: + template + friend region grid_detail::make_region(P&&... args); + + std::vector m_boxes; + + region(grid_detail::normalized_t, std::vector&& boxes); +}; + +} // namespace celerity::detail + +namespace celerity::detail::grid_detail { + +template +std::vector> boxes_cast(const std::vector>& in) { + assert(get_min_dimensions(in.begin(), in.end()) <= DimsOut); + std::vector> out(in.size()); + std::transform(in.begin(), in.end(), out.begin(), [](const box& box) { return box_cast(box); }); + return out; +} + +// forward-declaration for tests (explicitly instantiated) +template +void dissect_box(const box& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim); + +// forward-declaration for tests (explicitly instantiated) +template +BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterator first, const BidirectionalIterator last); + +// forward-declaration for tests (explicitly instantiated) +template +void normalize(std::vector>& boxes); + +// rvalue shortcut for normalize(lvalue) +template +std::vector>&& normalize(std::vector>&& boxes) { + normalize(boxes); + return std::move(boxes); +} + +} // namespace celerity::detail::grid_detail + +namespace celerity::detail { + +template +region region_cast(const region& in) { + assert(in.get_min_dimensions() <= DimsOut); + // a normalized region will remain normalized after the cast + return grid_detail::make_region(grid_detail::normalized, grid_detail::boxes_cast(in.get_boxes())); +} + +template +box bounding_box(const region& region) { + return bounding_box(region.get_boxes().begin(), region.get_boxes().end()); +} + +template +region region_union(const region& lhs, const region& rhs); + +template +region region_union(const region& lhs, const box& rhs) { + return region_union(lhs, region(rhs)); +} + +template +region region_union(const box& lhs, const region& rhs) { + return region_union(region(lhs), rhs); +} + +template +region region_union(const box& lhs, const box& rhs) { + return region(std::vector{lhs, rhs}); +} + +template +region region_intersection(const region& lhs, const region& rhs); + +template +region region_intersection(const region& lhs, const box& rhs) { + return region_intersection(lhs, region(rhs)); +} + +template +region region_intersection(const box& lhs, const region& rhs) { + return region_intersection(region(lhs), rhs); +} - } // namespace impl +template +region region_difference(const region& lhs, const region& rhs); +template +region region_difference(const region& lhs, const box& rhs) { + return region_difference(lhs, region(rhs)); +} - inline GridBox<1> subrange_to_grid_box(const subrange<1>& sr) { return impl::subrange_to_grid_box<1>(sr); } - inline GridBox<2> subrange_to_grid_box(const subrange<2>& sr) { return impl::subrange_to_grid_box<2>(sr); } - inline GridBox<3> subrange_to_grid_box(const subrange<3>& sr) { return impl::subrange_to_grid_box<3>(sr); } +template +region region_difference(const box& lhs, const region& rhs) { + return region_difference(region(lhs), rhs); +} - inline subrange<1> grid_box_to_subrange(const GridBox<1>& box) { return impl::grid_box_to_subrange<1>(box); } - inline subrange<2> grid_box_to_subrange(const GridBox<2>& box) { return impl::grid_box_to_subrange<2>(box); } - inline subrange<3> grid_box_to_subrange(const GridBox<3>& box) { return impl::grid_box_to_subrange<3>(box); } +template +region region_difference(const box& lhs, const box& rhs) { + return region_difference(region(lhs), region(rhs)); +} -} // namespace detail -} // namespace celerity +} // namespace celerity::detail diff --git a/include/print_utils.h b/include/print_utils.h index a25a2c564..7e4c6cf9b 100644 --- a/include/print_utils.h +++ b/include/print_utils.h @@ -1,22 +1,69 @@ #pragma once +#include "grid.h" #include "ranges.h" -namespace celerity { +#include -namespace detail { - std::ostream& print_chunk3(std::ostream& os, chunk<3> chnk3); - std::ostream& print_subrange3(std::ostream& os, subrange<3> subr3); -} // namespace detail +template +struct fmt::formatter> : fmt::formatter { + format_context::iterator format(const Interface& coord, format_context& ctx) const { + auto out = ctx.out(); + *out++ = '['; + for(int d = 0; d < Dims; ++d) { + if(d != 0) *out++ = ','; + out = formatter::format(coord[d], ctx); + } + *out++ = ']'; + return out; + } +}; template -std::ostream& operator<<(std::ostream& os, chunk chnk) { - return detail::print_chunk3(os, detail::chunk_cast<3>(chnk)); -} +struct fmt::formatter> : fmt::formatter, Dims>> {}; template -std::ostream& operator<<(std::ostream& os, subrange subr) { - return detail::print_subrange3(os, detail::subrange_cast<3>(subr)); -} +struct fmt::formatter> : fmt::formatter, Dims>> {}; -} // namespace celerity +template +struct fmt::formatter> : fmt::formatter> { + format_context::iterator format(const celerity::detail::box& box, format_context& ctx) const { + auto out = ctx.out(); + out = formatter>::format(box.get_min(), ctx); + out = std::copy_n(" - ", 3, out); + out = formatter>::format(box.get_max(), ctx); + return out; + } +}; + +template +struct fmt::formatter> : fmt::formatter> { + format_context::iterator format(const celerity::detail::region& region, format_context& ctx) const { + auto out = ctx.out(); + *out++ = '{'; + for(size_t i = 0; i < region.get_boxes().size(); ++i) { + if(i != 0) out = std::copy_n(", ", 2, out); + out = formatter>::format(region.get_boxes()[i], ctx); + } + *out++ = '}'; + return out; + } +}; + +template +struct fmt::formatter> : fmt::formatter> { + format_context::iterator format(const celerity::subrange& sr, format_context& ctx) const { + return fmt::formatter>::format(celerity::detail::box(sr), ctx); + } +}; + +template +struct fmt::formatter> : fmt::formatter> { + format_context::iterator format(const celerity::chunk& chunk, format_context& ctx) const { + auto out = ctx.out(); + out = fmt::formatter>::format(celerity::subrange(chunk.offset, chunk.range), ctx); + out = std::copy_n(" : ", 3, out); + out = formatter>::format(celerity::id(chunk.global_size), ctx); // cast to id to avoid multiple inheritance + return out; + } +}; diff --git a/include/ranges.h b/include/ranges.h index b0c0ed84d..22c41d546 100644 --- a/include/ranges.h +++ b/include/ranges.h @@ -26,8 +26,16 @@ struct make_from_t { // and would otherwise be prohibited by strict-aliasing rules (because two identical pointers with the same type must point to the same object). template struct coordinate_storage { - constexpr size_t operator[](int dimension) const { return values[dimension]; } - constexpr size_t& operator[](int dimension) { return values[dimension]; } + constexpr size_t operator[](int dimension) const { + CELERITY_DETAIL_ASSERT_ON_HOST(dimension < Dims); + return values[dimension]; + } + + constexpr size_t& operator[](int dimension) { + CELERITY_DETAIL_ASSERT_ON_HOST(dimension < Dims); + return values[dimension]; + } + size_t values[Dims] = {}; }; @@ -59,7 +67,7 @@ class coordinate { template )>> constexpr coordinate(const size_t dim_0, const Values... dim_n) : m_values{{dim_0, static_cast(dim_n)...}} {} - constexpr size_t get(int dimension) { return m_values[dimension]; } + constexpr size_t get(int dimension) const { return m_values[dimension]; } constexpr size_t& operator[](int dimension) { return m_values[dimension]; } constexpr size_t operator[](int dimension) const { return m_values[dimension]; } @@ -208,18 +216,19 @@ class coordinate { CELERITY_DETAIL_NO_UNIQUE_ADDRESS coordinate_storage m_values; }; -template -InterfaceOut coordinate_cast(const coordinate& in) { +template +InterfaceOut coordinate_cast(const InterfaceIn& in) { + CELERITY_DETAIL_ASSERT_ON_HOST(in.get_min_dimensions() <= InterfaceOut::dimensions); return InterfaceOut(make_from, in); } -template -range range_cast(const coordinate& in) { +template +range range_cast(const InterfaceIn& in) { return coordinate_cast>(in); } -template -id id_cast(const coordinate& in) { +template +id id_cast(const InterfaceIn& in) { return coordinate_cast>(in); } @@ -278,11 +287,19 @@ class range : public detail::coordinate, Dims> { } } + /// Returns the smallest dimensionality that `*this` can be `range_cast` to. + int get_min_dimensions() const { + for(int dims = Dims; dims > 0; --dims) { + if((*this)[dims - 1] > 1) return dims; + } + return 0; + } + private: friend class detail::coordinate, Dims>; - template - friend InterfaceOut detail::coordinate_cast(const detail::coordinate& in); + template + friend InterfaceOut detail::coordinate_cast(const InterfaceIn& in); template > constexpr range() noexcept {} @@ -336,9 +353,17 @@ class id : public detail::coordinate, Dims> { } } + /// Returns the smallest dimensionality that `*this` can be `id_cast` to. + int get_min_dimensions() const { + for(int dims = Dims; dims > 0; --dims) { + if((*this)[dims - 1] > 0) { return dims; } + } + return 0; + } + private: - template - friend InterfaceOut detail::coordinate_cast(const detail::coordinate& in); + template + friend InterfaceOut detail::coordinate_cast(const InterfaceIn& in); template constexpr id(const detail::make_from_t /* tag */, const detail::coordinate& in) @@ -429,9 +454,7 @@ namespace detail { public: subscript_proxy(Target& tgt, const id id) : m_tgt(tgt), m_id(id) {} - inline decltype(auto) operator[](const size_t index) const { - return subscript(m_tgt, m_id, index); - } + inline decltype(auto) operator[](const size_t index) const { return subscript(m_tgt, m_id, index); } private: Target& m_tgt; @@ -446,9 +469,9 @@ namespace detail { inline size_t get_linear_index(const range<3>& range, const id<3>& index) { return index[0] * range[1] * range[2] + index[1] * range[2] + index[2]; } -#define MAKE_COMPONENT_WISE_BINARY_FN(name, range_type, op) \ +#define CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(name, coord, op) \ template \ - range_type name(const range_type& a, const range_type& b) { \ + coord name(const coord& a, const coord& b) { \ auto result = a; \ for(int d = 0; d < Dims; ++d) { \ result[d] = op(result[d], b[d]); \ @@ -456,12 +479,21 @@ namespace detail { return result; \ } - MAKE_COMPONENT_WISE_BINARY_FN(min_range, range, std::min) - MAKE_COMPONENT_WISE_BINARY_FN(max_range, range, std::max) - MAKE_COMPONENT_WISE_BINARY_FN(min_id, id, std::min) - MAKE_COMPONENT_WISE_BINARY_FN(max_id, id, std::max) + CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(range_min, range, std::min) + CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(range_max, range, std::max) + CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(id_min, id, std::min) + CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN(id_max, id, std::max) + +#undef CELERITY_DETAIL_MAKE_COMPONENT_WISE_FN -#undef MAKE_COMPONENT_WISE_BINARY_FN + template + bool all_true(const coordinate &bools) { + for(int d = 0; d < Dims; ++d) { + CELERITY_DETAIL_ASSERT_ON_HOST(bools[d] == 0 || bools[d] == 1); + if(bools[d] == 0) return false; + } + return true; + } } // namespace detail @@ -478,6 +510,9 @@ struct chunk { chunk(const id& offset, const celerity::range& range, const celerity::range& global_size) : offset(offset), range(range), global_size(global_size) {} + /// Returns the smallest dimensionality that `*this` can be `chunk_cast` to. + int get_min_dimensions() const { return std::max({offset.get_min_dimensions(), range.get_min_dimensions(), global_size.get_min_dimensions()}); } + friend bool operator==(const chunk& lhs, const chunk& rhs) { return lhs.offset == rhs.offset && lhs.range == rhs.range && lhs.global_size == rhs.global_size; } @@ -492,11 +527,12 @@ struct subrange { CELERITY_DETAIL_NO_UNIQUE_ADDRESS celerity::range range = detail::zero; subrange() = default; - subrange(const id& offset, const celerity::range& range) : offset(offset), range(range) {} - subrange(const chunk& other) : offset(other.offset), range(other.range) {} + /// Returns the smallest dimensionality that `*this` can be `subrange_cast` to. + int get_min_dimensions() const { return std::max({offset.get_min_dimensions(), range.get_min_dimensions()}); } + friend bool operator==(const subrange& lhs, const subrange& rhs) { return lhs.offset == rhs.offset && lhs.range == rhs.range; } friend bool operator!=(const subrange& lhs, const subrange& rhs) { return !operator==(lhs, rhs); } }; @@ -505,11 +541,13 @@ namespace detail { template chunk chunk_cast(const chunk& other) { + CELERITY_DETAIL_ASSERT_ON_HOST(other.get_min_dimensions() <= Dims); return chunk{detail::id_cast(other.offset), detail::range_cast(other.range), detail::range_cast(other.global_size)}; } template subrange subrange_cast(const subrange& other) { + CELERITY_DETAIL_ASSERT_ON_HOST(other.get_min_dimensions() <= Dims); return subrange{detail::id_cast(other.offset), detail::range_cast(other.range)}; } diff --git a/include/region_map.h b/include/region_map.h index 5ee1c798f..2ee9facaa 100644 --- a/include/region_map.h +++ b/include/region_map.h @@ -37,7 +37,7 @@ namespace region_map_detail { constexpr size_t min_children = 2; template - bool is_lo_inside(const GridBox& a, const GridBox& b) { + bool is_lo_inside(const box& a, const box& b) { static_assert(D < Dims); const auto a_min = a.get_min(); if(a_min[D] <= b.get_min()[D]) return false; @@ -46,7 +46,7 @@ namespace region_map_detail { } template - bool is_hi_inside(const GridBox& a, const GridBox& b) { + bool is_hi_inside(const box& a, const box& b) { static_assert(D < Dims); const auto a_max = a.get_max(); if(a_max[D] <= b.get_min()[D]) return false; @@ -54,8 +54,8 @@ namespace region_map_detail { return true; } - template - GridBox compute_bounding_box(const GridBox& a, const GridBox& b) { + template + box compute_bounding_box(const box& a, const box& b) { const auto min_a = a.get_min(); const auto min_b = b.get_min(); const auto max_a = a.get_max(); @@ -69,13 +69,13 @@ namespace region_map_detail { return {new_min, new_max}; } - template - bool do_overlap(const GridBox& a, const GridBox& b) { - return a.intersectsWith(b); + template + bool do_overlap(const box& a, const box& b) { + return !box_intersection(a, b).empty(); } - template - bool is_inside(const GridBox& box, const GridPoint& point) { + template + bool is_inside(const box& box, const id& point) { auto box_min = box.get_min(); auto box_max = box.get_max(); bool inside = true; @@ -86,17 +86,6 @@ namespace region_map_detail { return inside; } - template - GridBox box_cast(const GridBox& other) { - GridPoint min; - GridPoint max; - for(size_t o = 0; o < DimsOut; ++o) { - min[o] = o < DimsIn ? other.get_min()[o] : 0; - max[o] = o < DimsIn ? other.get_max()[o] : 1; - } - return GridBox(min, max); - } - /** * Check that the region map's tree structure is in a good state: * - Root bounding box is equal to extent @@ -111,7 +100,7 @@ namespace region_map_detail { rm.m_root->sanity_check_bounding_boxes(); size_t max_depth = 0; - std::queue, const typename RegionMap::types::inner_node_type*>> node_queue; + std::queue, const typename RegionMap::types::inner_node_type*>> node_queue; node_queue.push(std::make_pair(rm.m_root->get_bounding_box(), rm.m_root.get())); while(!node_queue.empty()) { @@ -138,13 +127,13 @@ namespace region_map_detail { #endif } - template + template class inner_node; /** * Convenience types shared by inner_node and region_map_impl. */ - template + template class region_map_types { public: static_assert(Dims <= 3); @@ -152,30 +141,30 @@ namespace region_map_detail { using inner_node_type = inner_node; using unique_inner_node_ptr = std::unique_ptr; using inner_node_child_type = std::variant; - using entry = std::pair, ValueType>; + using entry = std::pair, ValueType>; struct insert_node_action { - GridBox box; + box box; ValueType value; bool processed_locally = false; }; struct erase_node_action { - GridBox box; + box box; bool processed_locally = false; }; using update_action = std::variant; - using orphan = std::pair, inner_node_child_type>; + using orphan = std::pair, inner_node_child_type>; struct insert_result { unique_inner_node_ptr spilled_node; // This should always be the same as spilled_node->get_bounding_box (TODO: assert?) - GridBox spilled_box; + box spilled_box; }; }; - template + template class inner_node { friend struct celerity::detail::region_map_testspy; @@ -232,7 +221,7 @@ namespace region_map_detail { * @param actions The list of erase and insert actions required to create a hole for the new entry. * @returns True if a localized update operation was performed that may require a bounding box recomputation. */ - bool update_box(const GridBox& box, const ValueType& value, std::vector& actions) { + bool update_box(const box& box, const ValueType& value, std::vector& actions) { if(!m_contains_leaves) { bool any_child_did_local_update = false; for(size_t i = 0; i < m_child_boxes.size(); ++i) { @@ -269,21 +258,31 @@ namespace region_map_detail { // Partial overlap. Check in each dimension which sides of the box intersect with the current box, creating new boxes along the way. // TODO PERF: A split may not even be necessary, if the value remains the same. Is this something worth optimizing for? - GridBox remainder = child_box; + detail::box remainder = child_box; const auto& child_value = get_child_value(i); const auto split_along = [&](const auto dim) { if(is_lo_inside(box, child_box)) { - auto new_box = remainder; - new_box.get_max()[dim.value] = box.get_min()[dim.value]; - remainder.get_min()[dim.value] = box.get_min()[dim.value]; + auto new_box_max = remainder.get_max(); + new_box_max[dim.value] = box.get_min()[dim.value]; + const auto new_box = detail::box(remainder.get_min(), new_box_max); + + auto new_remainder_min = remainder.get_min(); + new_remainder_min[dim.value] = box.get_min()[dim.value]; + remainder = detail::box(new_remainder_min, remainder.get_max()); + actions.push_back(typename types::insert_node_action{new_box, child_value}); } if(is_hi_inside(box, child_box)) { - auto new_box = remainder; - new_box.get_min()[dim.value] = box.get_max()[dim.value]; - remainder.get_max()[dim.value] = box.get_max()[dim.value]; + auto new_box_min = remainder.get_min(); + new_box_min[dim.value] = box.get_max()[dim.value]; + const auto new_box = detail::box(new_box_min, remainder.get_max()); + + auto new_remainder_max = remainder.get_max(); + new_remainder_max[dim.value] = box.get_max()[dim.value]; + remainder = detail::box(remainder.get_min(), new_remainder_max); + actions.push_back(typename types::insert_node_action{new_box, child_value}); } }; @@ -362,14 +361,14 @@ namespace region_map_detail { * * TODO: Structurally very similar to insert_subtree - can we DRY up? */ - std::optional insert(const GridBox& box, const ValueType& value) { + std::optional insert(const box& box, const ValueType& value) { if(!m_contains_leaves) { // Value belongs deeper into the tree. Find child that best fits it. // TODO PERF: Resolve ties in area increase according to [Guttman 1984] size_t best_i = std::numeric_limits::max(); size_t smallest_area_delta = std::numeric_limits::max(); for(size_t i = 0; i < m_child_boxes.size(); ++i) { - const auto area_delta = compute_bounding_box(m_child_boxes[i], box).area() - m_child_boxes[i].area(); + const auto area_delta = compute_bounding_box(m_child_boxes[i], box).get_area() - m_child_boxes[i].get_area(); if(area_delta < smallest_area_delta) { smallest_area_delta = area_delta; best_i = i; @@ -433,8 +432,8 @@ namespace region_map_detail { // Greedily assign all values to groups, O(N^2) auto bbox1 = m_child_boxes[seed1]; auto bbox2 = m_child_boxes[seed2]; - auto area1 = bbox1.area(); - auto area2 = bbox2.area(); + auto area1 = bbox1.get_area(); + auto area2 = bbox2.get_area(); std::vector assigned(m_children.size(), false); assigned[seed1] = true; assigned[seed2] = true; @@ -442,7 +441,7 @@ namespace region_map_detail { while(num_assigned < m_children.size()) { size_t smallest_area_delta = std::numeric_limits::max(); size_t smallest_i = std::numeric_limits::max(); - GridBox smallest_bbox; + detail::box smallest_bbox; size_t smallest_area = 0; size_t target_node = 0; @@ -451,8 +450,8 @@ namespace region_map_detail { const auto new_bbox1 = compute_bounding_box(m_child_boxes[i], bbox1); const auto new_bbox2 = compute_bounding_box(m_child_boxes[i], bbox2); - const auto new_area1 = new_bbox1.area(); - const auto new_area2 = new_bbox2.area(); + const auto new_area1 = new_bbox1.get_area(); + const auto new_area2 = new_bbox2.get_area(); const auto ad1 = (new_area1 - area1); const auto ad2 = (new_area2 - area2); @@ -514,7 +513,7 @@ namespace region_map_detail { * * TODO: Structurally very similar to insert - can we DRY up? */ - std::optional insert_subtree(const GridBox& box, std::unique_ptr>&& subtree) { + std::optional insert_subtree(const box& box, std::unique_ptr>&& subtree) { assert(!m_contains_leaves); assert(subtree->m_depth > m_depth); @@ -525,7 +524,7 @@ namespace region_map_detail { size_t best_i = std::numeric_limits::max(); size_t smallest_area_delta = std::numeric_limits::max(); for(size_t i = 0; i < m_child_boxes.size(); ++i) { - const auto area_delta = compute_bounding_box(m_child_boxes[i], box).area() - m_child_boxes[i].area(); + const auto area_delta = compute_bounding_box(m_child_boxes[i], box).get_area() - m_child_boxes[i].get_area(); if(area_delta < smallest_area_delta) { smallest_area_delta = area_delta; best_i = i; @@ -576,7 +575,7 @@ namespace region_map_detail { const auto new_bbox2 = compute_bounding_box(bbox2, m_child_boxes[i]); // Assign value to node that results in smaller area increase. - if((new_bbox1.area() - bbox1.area()) < (new_bbox2.area() - bbox2.area())) { + if((new_bbox1.get_area() - bbox1.get_area()) < (new_bbox2.get_area() - bbox2.get_area())) { node1->insert_child_node(m_child_boxes[i], std::move(std::get(m_children[i]))); bbox1 = new_bbox1; } else { @@ -607,7 +606,7 @@ namespace region_map_detail { * @param orphans A list of entries or subtrees that were orphaned due to dissolving a node. * @returns True if the box was erased in this subtree. */ - bool erase(const GridBox& box, std::vector& orphans) { + bool erase(const box& box, std::vector& orphans) { bool did_erase = false; if(!m_contains_leaves) { @@ -649,7 +648,7 @@ namespace region_map_detail { /** * Recursively finds all entries that intersect with box. */ - void query(const GridBox& box, std::vector& intersecting) const { + void query(const box& box, std::vector& intersecting) const { if(!m_contains_leaves) { for(size_t i = 0; i < m_children.size(); ++i) { if(do_overlap(m_child_boxes[i], box)) { get_child_node(i).query(box, intersecting); } @@ -664,7 +663,7 @@ namespace region_map_detail { /** * Returns the entry containing a given point, if such an entry exists. */ - std::optional point_query(const GridPoint& point) const { + std::optional point_query(const id& point) const { for(size_t i = 0; i < m_children.size(); ++i) { if(is_inside(m_child_boxes[i], point)) { if(!m_contains_leaves) { @@ -688,16 +687,16 @@ namespace region_map_detail { } // NOTE: Not O(1)! - GridBox get_bounding_box() const { + box get_bounding_box() const { assert(!m_child_boxes.empty()); - GridBox bbox = m_child_boxes[0]; + box bbox = m_child_boxes[0]; for(size_t i = 1; i < m_child_boxes.size(); ++i) { bbox = compute_bounding_box(bbox, m_child_boxes[i]); } return bbox; } - void insert_child_node(const GridBox& box, std::unique_ptr&& node) { + void insert_child_node(const box& box, std::unique_ptr&& node) { assert(m_children.size() < max_children + 1); // During splits we temporarily go one above the max m_child_boxes.push_back(box); m_children.emplace_back(std::move(node)); @@ -746,7 +745,7 @@ namespace region_map_detail { bool m_contains_leaves; // TODO PERF: Consider storing these in small vectors - std::vector> m_child_boxes; + std::vector> m_child_boxes; std::vector m_children; inner_node& get_child_node(size_t index) { return *std::get(m_children[index]); } @@ -755,12 +754,12 @@ namespace region_map_detail { ValueType& get_child_value(size_t index) { return std::get(m_children[index]); } const ValueType& get_child_value(size_t index) const { return std::get(m_children[index]); } - void insert_child_value(const GridBox& box, const ValueType& value) { + void insert_child_value(const box& box, const ValueType& value) { assert(m_children.size() < max_children + 1); // During splits we temporarily go one above the max #if !defined(NDEBUG) for(auto& b : m_child_boxes) { // New box must not overlap with any other - assert(GridRegion::intersect(b, box).empty()); + assert(box_intersection(b, box).empty()); } #endif m_child_boxes.push_back(box); @@ -779,7 +778,7 @@ namespace region_map_detail { size_t worst_j = std::numeric_limits::max(); for(size_t i = 0; i < m_child_boxes.size(); ++i) { for(size_t j = i + 1; j < m_child_boxes.size(); ++j) { - const auto area = compute_bounding_box(m_child_boxes[i], m_child_boxes[j]).area(); + const auto area = compute_bounding_box(m_child_boxes[i], m_child_boxes[j]).get_area(); if(area > worst_area) { worst_area = area; worst_i = i; @@ -794,14 +793,14 @@ namespace region_map_detail { bool is_underfull() const { return m_children.size() < min_children; } - GridBox sanity_check_bounding_boxes() const { + box sanity_check_bounding_boxes() const { #if !defined(NDEBUG) - // After an erase this node might not have any children. Return empty box in that case. - if(m_child_boxes.empty()) { return box_cast(GridBox<3>({0, 0, 0}, {0, 0, 0})); } + // After an erase this node might not have any children. Return empty box in that case. TODO this breaks for Dims == 0 (where area is always 1)! + if(m_child_boxes.empty()) { return box_cast(box<3>({0, 0, 0}, {0, 0, 0})); } - GridBox result = m_child_boxes[0]; + box result = m_child_boxes[0]; for(size_t i = 1; i < m_child_boxes.size(); ++i) { - const GridBox child_box = m_contains_leaves ? m_child_boxes[i] : get_child_node(i).sanity_check_bounding_boxes(); + const box child_box = m_contains_leaves ? m_child_boxes[i] : get_child_node(i).sanity_check_bounding_boxes(); assert(m_child_boxes[i] == child_box); result = compute_bounding_box(result, child_box); } @@ -811,24 +810,17 @@ namespace region_map_detail { } }; - inline void assert_dimensionality(const GridBox<3>& box, const int dims) { + inline void assert_dimensionality(const box<3>& box, const int dims) { #if !defined(NDEBUG) - const auto& min = box.get_min(); - const auto& max = box.get_max(); - if(dims < 3) { - assert(min[2] == 0); - assert(max[2] == 1); - } - if(dims == 1) { - assert(min[1] == 0); - assert(max[1] == 1); - } + assert(box.get_min_dimensions() <= dims); #endif } - inline void assert_dimensionality(const GridRegion<3>& reg, const int dims) { + inline void assert_dimensionality(const region<3>& reg, const int dims) { #if !defined(NDEBUG) - reg.scanByBoxes([&](const GridBox<3>& box) { assert_dimensionality(box, dims); }); + for(const auto& box : reg.get_boxes()) { + assert_dimensionality(box, dims); + } #endif } @@ -844,7 +836,7 @@ namespace region_map_detail { * TODO PERF: Try to minimize the number of value copies we do during intermediate steps (e.g. when merging) * TODO PERF: Look into bulk-loading algorithms for updating multiple boxes at once */ - template + template class region_map_impl { friend struct celerity::detail::region_map_testspy; using types = region_map_types; @@ -854,8 +846,7 @@ namespace region_map_detail { static constexpr size_t dimensions = Dims; region_map_impl(const range& extent, ValueType default_value = ValueType{}) - : m_extent(subrange_to_grid_box(subrange{id_cast(id<3>{0, 0, 0}), extent})), - m_root(std::make_unique(true, 0)) { + : m_extent(subrange({}, extent)), m_root(std::make_unique(true, 0)) { m_root->insert(this->m_extent, default_value); } @@ -879,10 +870,10 @@ namespace region_map_detail { * 3) Attempt to merge the box as well as any other newly created boxes * with their surrounding entries. */ - void update_box(const GridBox& box, const ValueType& value) { + void update_box(const box& box, const ValueType& value) { assert(m_root != nullptr && "Moved from?"); - const auto clamped_box = GridBox::intersect(m_extent, box); + const auto clamped_box = box_intersection(m_extent, box); // This can happen e.g. for empty buffers, or if the box is // completely outside the region map's extent for some reason. @@ -904,18 +895,18 @@ namespace region_map_detail { #if !defined(NDEBUG) // Sanity check: Erased and inserted boxes must cover the same space - GridRegion erased; - GridRegion inserted; + region erased; + region inserted; for(const auto& a : m_update_actions) { utils::match( a, [&](const typename types::erase_node_action& erase_action) { - assert(GridRegion::intersect(erased, erase_action.box).empty()); - erased = GridRegion::merge(erased, erase_action.box); + assert(region_intersection(erased, erase_action.box).empty()); + erased = region_union(erased, erase_action.box); }, [&](const typename types::insert_node_action& insert_action) { - assert(GridRegion::intersect(inserted, insert_action.box).empty()); - inserted = GridRegion::merge(inserted, insert_action.box); + assert(region_intersection(inserted, insert_action.box).empty()); + inserted = region_union(inserted, insert_action.box); }); } assert(erased == inserted); @@ -968,7 +959,7 @@ namespace region_map_detail { * * TODO PERF: In most cases we are unlikely to store the returned values, and the copy is unnecessary. Return const reference instead? */ - std::vector get_region_values(const GridBox& request) const { + std::vector get_region_values(const box& request) const { assert(m_root != nullptr && "Moved from?"); m_query_results_raw.clear(); @@ -993,7 +984,7 @@ namespace region_map_detail { clamped_min[d] = std::max(v_min[d], r_min[d]); clamped_max[d] = std::min(v_max[d], r_max[d]); } - m_query_results_clamped.push_back(std::make_pair(GridBox{clamped_min, clamped_max}, v)); + m_query_results_clamped.push_back(std::make_pair(box{clamped_min, clamped_max}, v)); } #else std::swap(m_query_results_raw, m_query_results_clamped); @@ -1043,7 +1034,7 @@ namespace region_map_detail { return m_root->format_to(out, 0); } - range get_extent() const { return grid_box_to_subrange(m_extent).range; } + range get_extent() const { return m_extent.get_range(); } private: template @@ -1051,7 +1042,7 @@ namespace region_map_detail { // The extent specifies the boundaries for the region map to which all entries are clamped, // and which initially contains the default value. Currently always starts at [0,0,0]. - GridBox m_extent; + box m_extent; std::unique_ptr m_root; @@ -1069,7 +1060,7 @@ namespace region_map_detail { * Inserts a new entry into the tree. * Precondition: The insert location must be empty. */ - void insert(const GridBox& box, const ValueType& value) { + void insert(const box& box, const ValueType& value) { auto ret = m_root->insert(box, value); if(ret.has_value()) { reroot(std::move(*ret)); } } @@ -1077,7 +1068,7 @@ namespace region_map_detail { /** * Inserts a subtree (either from a dissolved parent or after a split) into the tree. */ - void insert_subtree(const GridBox& box, typename types::unique_inner_node_ptr&& subtree) { + void insert_subtree(const box& box, typename types::unique_inner_node_ptr&& subtree) { auto ret = m_root->insert_subtree(box, std::move(subtree)); if(ret.has_value()) { reroot(std::move(*ret)); } } @@ -1099,7 +1090,7 @@ namespace region_map_detail { * Erases a box from the tree. If the parent box becomes underfull it is dissolved and its children * are reinserted. */ - void erase(const GridBox& box) { + void erase(const box& box) { m_erase_orphans.clear(); [[maybe_unused]] const auto did_erase = m_root->erase(box, m_erase_orphans); assert(did_erase); @@ -1122,7 +1113,7 @@ namespace region_map_detail { * Calculates whether two boxes can be merged. In order to be mergeable, the two boxes * have to touch in one dimension and match exactly in all remaining dimensions. */ - bool can_merge(const GridBox& box_a, const GridBox& box_b) const { + bool can_merge(const box& box_a, const box& box_b) const { bool adjacent = false; for(size_t d = 0; d < Dims; ++d) { if(box_a.get_min()[d] != box_b.get_min()[d] || box_a.get_max()[d] != box_b.get_max()[d]) { @@ -1146,10 +1137,10 @@ namespace region_map_detail { void try_merge(std::vector&& merge_candidates) { #if !defined(NDEBUG) // Sanity check: Merge candidates do not overlap - GridRegion candidate_union; + region candidate_union; for(auto& [box, value] : merge_candidates) { - assert(GridRegion::intersect(candidate_union, box).empty()); - candidate_union = GridRegion::merge(candidate_union, box); + assert(region_intersection(candidate_union, box).empty()); + candidate_union = region_union(candidate_union, box); } #endif @@ -1170,7 +1161,7 @@ namespace region_map_detail { for(size_t d = 0; d < Dims; ++d) { const auto min = box.get_min(); const auto max = box.get_max(); - std::optional> other_box; + std::optional> other_box; if(min[d] > 0) { auto probe = min; probe[d] -= 1; @@ -1236,9 +1227,9 @@ namespace region_map_detail { public: region_map_impl(const range<0>& /* extent */, ValueType default_value) : m_value(default_value) {} - void update_box(const GridBox<1>& /* box */, const ValueType& value) { m_value = value; } + void update_box(const box<1>& /* box */, const ValueType& value) { m_value = value; } - std::vector, ValueType>> get_region_values(const GridBox<1>& /* request */) const { return {{GridBox<1>{0, 1}, m_value}}; } + std::vector, ValueType>> get_region_values(const box<1>& /* request */) const { return {{box<1>{0, 1}, m_value}}; } template void apply_to_values(const Functor& f) { @@ -1267,7 +1258,7 @@ class region_map { */ region_map(range<3> extent, int dims, ValueType default_value = ValueType{}) : m_dims(dims) { using namespace region_map_detail; - assert_dimensionality(subrange_to_grid_box(subrange<3>{id<3>{}, extent}), dims); + assert_dimensionality(box<3>(subrange<3>{id<3>{}, extent}), dims); switch(m_dims) { case 0: m_region_map.template emplace>(range_cast<0>(extent), default_value); break; case 1: m_region_map.template emplace>(range_cast<1>(extent), default_value); break; @@ -1280,15 +1271,17 @@ class region_map { /** * Sets a new value for the provided region within the region map. */ - void update_region(const GridRegion<3>& region, const ValueType& value) { + void update_region(const region<3>& region, const ValueType& value) { region_map_detail::assert_dimensionality(region, m_dims); - region.scanByBoxes([&](const GridBox<3>& box) { update_box(box, value); }); + for(const auto& box : region.get_boxes()) { + update_box(box, value); + } } /** * Sets a new value for the provided box within the region map. */ - void update_box(const GridBox<3>& box, const ValueType& value) { + void update_box(const box<3>& box, const ValueType& value) { using namespace region_map_detail; switch(m_dims) { case 0: get_map<0>().update_box(box_cast<1>(box), value); break; @@ -1304,13 +1297,13 @@ class region_map { * * @returns A list of boxes clamped to the request region, and their associated values. */ - std::vector, ValueType>> get_region_values(const GridRegion<3>& request) const { + std::vector, ValueType>> get_region_values(const region<3>& request) const { region_map_detail::assert_dimensionality(request, m_dims); - std::vector, ValueType>> results; - request.scanByBoxes([&](const GridBox<3>& box) { + std::vector, ValueType>> results; + for(const auto& box : request.get_boxes()) { const auto r = get_region_values(box); results.insert(results.begin(), r.cbegin(), r.cend()); - }); + } return results; } @@ -1319,9 +1312,9 @@ class region_map { * * @returns A list of boxes clamped to the request box, and their associated values. */ - std::vector, ValueType>> get_region_values(const GridBox<3>& request) const { + std::vector, ValueType>> get_region_values(const box<3>& request) const { using namespace region_map_detail; - std::vector, ValueType>> results; + std::vector, ValueType>> results; switch(m_dims) { // TODO: AllScale box doesn't support 0 dimensions, fall back to 1 case 0: { diff --git a/include/task.h b/include/task.h index 1b4aa9fbe..e69b1d719 100644 --- a/include/task.h +++ b/include/task.h @@ -104,10 +104,10 @@ namespace detail { * * @returns The region obtained by merging the results of all range-mappers for this buffer and mode */ - GridRegion<3> get_mode_requirements( + region<3> get_mode_requirements( const buffer_id bid, const access_mode mode, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const; - GridBox<3> get_requirements_for_nth_access(const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const; + box<3> get_requirements_for_nth_access(const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const; private: std::vector>> m_accesses; diff --git a/include/workaround.h b/include/workaround.h index eccc5eb89..5ef8fd861 100644 --- a/include/workaround.h +++ b/include/workaround.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -41,3 +43,9 @@ #define CELERITY_DETAIL_HAS_NO_UNIQUE_ADDRESS false #define CELERITY_DETAIL_NO_UNIQUE_ADDRESS #endif + +#if CELERITY_DETAIL_ENABLE_DEBUG && !defined(__SYCL_DEVICE_ONLY__) +#define CELERITY_DETAIL_ASSERT_ON_HOST(...) assert(__VA_ARGS__) +#else +#define CELERITY_DETAIL_ASSERT_ON_HOST(...) +#endif diff --git a/src/buffer_manager.cc b/src/buffer_manager.cc index 10d60dae0..e61b1c183 100644 --- a/src/buffer_manager.cc +++ b/src/buffer_manager.cc @@ -33,7 +33,7 @@ namespace detail { void buffer_manager::get_buffer_data(buffer_id bid, const subrange<3>& sr, void* out_linearized) { std::unique_lock lock(m_mutex); assert(m_buffers.count(bid) == 1 && (m_buffers.at(bid).device_buf.is_allocated() || m_buffers.at(bid).host_buf.is_allocated())); - auto data_locations = m_newest_data_location.at(bid).get_region_values(subrange_to_grid_box(sr)); + auto data_locations = m_newest_data_location.at(bid).get_region_values(region(sr)); // Slow path: We need to obtain current data from both host and device. if(data_locations.size() > 1) { @@ -50,13 +50,13 @@ namespace detail { } existing_buf = make_buffer_subrange_coherent(bid, access_mode::read, std::move(existing_buf), sr, std::move(replacement_buf)); - data_locations = {{subrange_to_grid_box(sr), data_location::host}}; + data_locations = {{box(sr), data_location::host}}; } // get_buffer_data will race with pending transfers for the same subrange. In case there are pending transfers and a host buffer does not exist yet, // these transfers cannot easily be flushed here as creating a host buffer requires a templated context that knows about DataT. assert(std::none_of(m_scheduled_transfers[bid].begin(), m_scheduled_transfers[bid].end(), - [&](const transfer& t) { return subrange_to_grid_box(sr).intersectsWith(subrange_to_grid_box(t.sr)); })); + [&](const transfer& t) { return !box_intersection(box(sr), box(t.sr)).empty(); })); if(data_locations[0].second == data_location::host || data_locations[0].second == data_location::host_and_device) { return m_buffers.at(bid).host_buf.storage->get_data({m_buffers.at(bid).host_buf.get_local_offset(sr.offset), sr.range}, out_linearized); @@ -73,7 +73,7 @@ namespace detail { buffer_manager::access_info buffer_manager::access_device_buffer(buffer_id bid, access_mode mode, const subrange<3>& sr) { std::unique_lock lock(m_mutex); - assert((range_cast<3>(sr.offset + sr.range) <= m_buffer_infos.at(bid).range) == range<3>(true, true, true)); + assert(all_true(range_cast<3>(sr.offset + sr.range) <= m_buffer_infos.at(bid).range)); auto& existing_buf = m_buffers[bid].device_buf; backing_buffer replacement_buf; @@ -130,14 +130,11 @@ namespace detail { // Use faux host accesses to retain all data from the device (except what is going to be discarded anyway). // TODO: This could be made more efficient, currently it may cause multiple consecutive resizes. - GridRegion<3> retain_region = subrange_to_grid_box(subrange<3>{existing_buf.offset, existing_buf.storage->get_range()}); - if(!access::mode_traits::is_consumer(mode)) { - retain_region = GridRegion<3>::difference(retain_region, subrange_to_grid_box(subrange<3>{sr.offset, sr.range})); + region retain_region(subrange(existing_buf.offset, existing_buf.storage->get_range())); + if(!access::mode_traits::is_consumer(mode)) { retain_region = region_difference(retain_region, region(sr)); } + for(const subrange<3> sr : retain_region.get_boxes()) { + access_host_buffer_impl(bid, access_mode::read, sr); } - retain_region.scanByBoxes([&](const GridBox<3>& box) { - const auto sr = grid_box_to_subrange(box); - access_host_buffer_impl(bid, access_mode::read, subrange<3>{sr.offset, sr.range}); - }); // We now have all data "backed up" on the host, so we may deallocate the device buffer (via destructor). existing_buf = backing_buffer{}; @@ -173,7 +170,7 @@ namespace detail { } buffer_manager::access_info buffer_manager::access_host_buffer_impl(const buffer_id bid, const access_mode mode, const subrange<3>& sr) { - assert((range_cast<3>(sr.offset + sr.range) <= m_buffer_infos.at(bid).range) == range<3>(true, true, true)); + assert(all_true(range_cast<3>(sr.offset + sr.range) <= m_buffer_infos.at(bid).range)); auto& existing_buf = m_buffers[bid].host_buf; backing_buffer replacement_buf; @@ -242,24 +239,21 @@ namespace detail { const auto target_buffer_location = target_buffer.storage->get_type() == buffer_type::host_buffer ? data_location::host : data_location::device; - const auto coherent_box = subrange_to_grid_box(coherent_sr); + const auto coherent_box = box(coherent_sr); // If a previous buffer is provided, we may have to retain some or all of the existing data. - const GridRegion<3> retain_region = ([&]() { - GridRegion<3> result = coherent_box; - if(previous_buffer.is_allocated()) { - result = GridRegion<3>::merge(result, subrange_to_grid_box({previous_buffer.offset, previous_buffer.storage->get_range()})); - } - return result; + const region<3> retain_region = ([&]() { + std::vector> boxes{coherent_box}; + if(previous_buffer.is_allocated()) { boxes.push_back(subrange(previous_buffer.offset, previous_buffer.storage->get_range())); } + return region(std::move(boxes)); })(); // IIFE // Sanity check: Retain region must be at least as large as coherence box (and fully overlap). - assert(coherent_box.area() <= retain_region.area()); - assert(GridRegion<3>::difference(coherent_box, retain_region).empty()); + assert(coherent_box.get_area() <= retain_region.get_area()); + assert(region_difference(coherent_box, retain_region).empty()); // Also check that the new target buffer could actually fit the entire retain region. - assert((grid_box_to_subrange(retain_region.boundingBox()).offset >= target_buffer.offset) == id(true, true, true)); - assert((grid_box_to_subrange(retain_region.boundingBox()).offset + grid_box_to_subrange(retain_region.boundingBox()).range - <= target_buffer.offset + target_buffer.storage->get_range()) + assert((bounding_box(retain_region).get_offset() >= target_buffer.offset) == id(true, true, true)); + assert((bounding_box(retain_region).get_offset() + bounding_box(retain_region).get_range() <= target_buffer.offset + target_buffer.storage->get_range()) == id(true, true, true)); // Check whether we have any scheduled transfers that overlap with the requested subrange, and if so, apply them. @@ -271,15 +265,15 @@ namespace detail { if(detail::access::mode_traits::is_consumer(mode)) #endif { - GridRegion<3> updated_region; + std::vector> updated_region_boxes; std::vector remaining_transfers; auto& scheduled_buffer_transfers = m_scheduled_transfers[bid]; remaining_transfers.reserve(scheduled_buffer_transfers.size() / 2); for(auto& t : scheduled_buffer_transfers) { - auto t_region = subrange_to_grid_box(t.sr); + auto t_box = box(t.sr); // Check whether this transfer applies to the current request. - auto t_minus_coherent_region = GridRegion<3>::difference(t_region, coherent_box); + auto t_minus_coherent_region = region_difference(t_box, coherent_box); if(!t_minus_coherent_region.empty()) { // Check if transfer applies partially. // This might happen in certain situations, when two different commands partially overlap in their required buffer ranges. @@ -289,19 +283,19 @@ namespace detail { // NOTE: We currently assume that one of the requests will consume the FULL transfer. Only then we discard it. // This assumption is valid right now, as the graph generator will not consolidate adjacent pushes for two (or more) // separate commands. This might however change in the future. - if(t_minus_coherent_region != t_region) { + if(t_minus_coherent_region != t_box) { assert(detail::access::mode_traits::is_consumer(mode)); - auto intersection = GridRegion<3>::intersect(t_region, coherent_box); - remaining_region_after_transfers = GridRegion<3>::difference(remaining_region_after_transfers, intersection); + auto intersection = region(box_intersection(t_box, coherent_box)); // TODO this can be a box instead of a region! + remaining_region_after_transfers = region_difference(remaining_region_after_transfers, intersection); const auto element_size = m_buffer_infos.at(bid).element_size; - intersection.scanByBoxes([&](const GridBox<3>& box) { - auto sr = grid_box_to_subrange(box); + for(const auto& box : intersection.get_boxes()) { + auto sr = box.get_subrange(); // TODO can this temp buffer be avoided? auto tmp = make_uninitialized_payload(sr.range.size() * element_size); linearize_subrange(t.linearized.get_pointer(), tmp.get_pointer(), element_size, t.sr.range, {sr.offset - t.sr.offset, sr.range}); target_buffer.storage->set_data({target_buffer.get_local_offset(sr.offset), sr.range}, tmp.get_pointer()); - updated_region = GridRegion<3>::merge(updated_region, box); - }); + updated_region_boxes.push_back(box); + } } // Transfer only applies partially, or not at all - which means we have to keep it around. remaining_transfers.emplace_back(std::move(t)); @@ -310,35 +304,35 @@ namespace detail { // Transfer applies fully. assert(detail::access::mode_traits::is_consumer(mode)); - remaining_region_after_transfers = GridRegion<3>::difference(remaining_region_after_transfers, t_region); + remaining_region_after_transfers = region_difference(remaining_region_after_transfers, t_box); target_buffer.storage->set_data({target_buffer.get_local_offset(t.sr.offset), t.sr.range}, t.linearized.get_pointer()); - updated_region = GridRegion<3>::merge(updated_region, t_region); + updated_region_boxes.push_back(t_box); } // The target buffer now has the newest data in this region. - m_newest_data_location.at(bid).update_region(updated_region, target_buffer_location); + m_newest_data_location.at(bid).update_region(region(std::move(updated_region_boxes)), target_buffer_location); scheduled_buffer_transfers = std::move(remaining_transfers); } if(!remaining_region_after_transfers.empty()) { - const auto maybe_retain_box = [&](const GridBox<3>& box) { + const auto maybe_retain_box = [&](const box<3>& box) { if(detail::access::mode_traits::is_consumer(mode)) { // If we are accessing the buffer using a consumer mode, we have to retain the full previous contents, otherwise... - const auto box_sr = grid_box_to_subrange(box); + const auto box_sr = box.get_subrange(); target_buffer.storage->copy( *previous_buffer.storage, previous_buffer.get_local_offset(box_sr.offset), target_buffer.get_local_offset(box_sr.offset), box_sr.range); } else { // ...check if there are parts of the previous buffer that we are not going to overwrite (and thus have to retain). // If so, copy only those parts. - const auto remaining_region = GridRegion<3>::difference(box, coherent_box); - remaining_region.scanByBoxes([&](const GridBox<3>& small_box) { - const auto small_box_sr = grid_box_to_subrange(small_box); + const auto remaining_region = region_difference(box, coherent_box); + for(const auto& small_box : remaining_region.get_boxes()) { + const auto small_box_sr = small_box.get_subrange(); target_buffer.storage->copy(*previous_buffer.storage, previous_buffer.get_local_offset(small_box_sr.offset), target_buffer.get_local_offset(small_box_sr.offset), small_box_sr.range); - }); + } } }; - GridRegion<3> replicated_region; + std::vector> replicated_boxes; auto& buffer_data_locations = m_newest_data_location.at(bid); const auto data_locations = buffer_data_locations.get_region_values(remaining_region_after_transfers); for(auto& dl : data_locations) { @@ -354,21 +348,21 @@ namespace detail { // Copy from host, unless we are using a pure producer mode else if(dl.second == data_location::host && detail::access::mode_traits::is_consumer(mode)) { assert(m_buffers[bid].host_buf.is_allocated()); - const auto box_sr = grid_box_to_subrange(dl.first); + const auto box_sr = dl.first.get_subrange(); const auto& host_buf = m_buffers[bid].host_buf; target_buffer.storage->copy( *host_buf.storage, host_buf.get_local_offset(box_sr.offset), target_buffer.get_local_offset(box_sr.offset), box_sr.range); - replicated_region = GridRegion<3>::merge(replicated_region, dl.first); + replicated_boxes.push_back(dl.first); } } else if(target_buffer.storage->get_type() == buffer_type::host_buffer) { // Copy from device, unless we are using a pure producer mode if(dl.second == data_location::device && detail::access::mode_traits::is_consumer(mode)) { assert(m_buffers[bid].device_buf.is_allocated()); - const auto box_sr = grid_box_to_subrange(dl.first); + const auto box_sr = dl.first.get_subrange(); const auto& device_buf = m_buffers[bid].device_buf; target_buffer.storage->copy( *device_buf.storage, device_buf.get_local_offset(box_sr.offset), target_buffer.get_local_offset(box_sr.offset), box_sr.range); - replicated_region = GridRegion<3>::merge(replicated_region, dl.first); + replicated_boxes.push_back(dl.first); } // Copy from host in case we are resizing an existing buffer else if((dl.second == data_location::host || dl.second == data_location::host_and_device) && previous_buffer.is_allocated()) { @@ -378,7 +372,7 @@ namespace detail { } // Finally, remember the fact that we replicated some regions to the new target location. - buffer_data_locations.update_region(replicated_region, data_location::host_and_device); + buffer_data_locations.update_region(region(std::move(replicated_boxes)), data_location::host_and_device); } if(detail::access::mode_traits::is_producer(mode)) { m_newest_data_location.at(bid).update_region(coherent_box, target_buffer_location); } diff --git a/src/buffer_transfer_manager.cc b/src/buffer_transfer_manager.cc index 9278c2336..c0f7fddc7 100644 --- a/src/buffer_transfer_manager.cc +++ b/src/buffer_transfer_manager.cc @@ -64,7 +64,7 @@ namespace detail { assert(pkg.get_command_type() == command_type::await_push); const auto& data = std::get(pkg.data); - GridRegion<3> expected_region = data.region; + const auto &expected_region = data.region; std::shared_ptr t_handle; // Check to see if we have (fully) received the data already diff --git a/src/distributed_graph_generator.cc b/src/distributed_graph_generator.cc index 7ed12e3c2..1b15e31c6 100644 --- a/src/distributed_graph_generator.cc +++ b/src/distributed_graph_generator.cc @@ -30,8 +30,8 @@ void distributed_graph_generator::add_buffer(const buffer_id bid, const int dims std::piecewise_construct, std::tuple{bid}, std::tuple{region_map{range, dims}, region_map{range, dims}}); // Mark contents as available locally (= don't generate await push commands) and fully replicated (= don't generate push commands). // This is required when tasks access host-initialized or uninitialized buffers. - m_buffer_states.at(bid).local_last_writer.update_region(subrange_to_grid_box({id<3>(), range}), m_epoch_for_new_commands); - m_buffer_states.at(bid).replicated_regions.update_region(subrange_to_grid_box({id<3>(), range}), node_bitset{}.set()); + m_buffer_states.at(bid).local_last_writer.update_region(subrange<3>({}, range), m_epoch_for_new_commands); + m_buffer_states.at(bid).replicated_regions.update_region(subrange<3>({}, range), node_bitset{}.set()); } // We simply split in the first dimension for now @@ -81,7 +81,7 @@ static std::vector> split_equal(const chunk<3>& full_chunk, const range return result; } -using buffer_requirements_map = std::unordered_map>>; +using buffer_requirements_map = std::unordered_map>>; static buffer_requirements_map get_buffer_requirements_for_mapped_access(const task& tsk, subrange<3> sr, const range<3> global_size) { buffer_requirements_map result; @@ -152,7 +152,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) if(tsk.get_type() == task_type::collective || tsk.get_type() == task_type::fence) { std::vector> chunks; for(size_t nid = 0; nid < m_num_nodes; ++nid) { - chunks.push_back(chunk_cast<3>(chunk<1>{id<1>{nid}, one, {m_num_nodes}})); + chunks.push_back(chunk_cast<3>(chunk<1>{id<1>{tsk.get_type() == task_type::collective ? nid : 0}, one, {m_num_nodes}})); } return chunks; } @@ -169,9 +169,9 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) const auto chunks_per_node = std::max(1, chunks.size() / m_num_nodes); // Union of all per-buffer writes on this node, used to determine which parts of a buffer are fresh/stale later on. - std::unordered_map> per_buffer_local_writes; + std::unordered_map> per_buffer_local_writes; // In case we need to push a region that is overwritten in the same task, we have to defer updating the last writer. - std::unordered_map, command_id>>> per_buffer_last_writer_update_list; + std::unordered_map, command_id>>> per_buffer_last_writer_update_list; // Buffers that currently are in a pending reduction state will receive a new buffer state after a reduction has been generated. std::unordered_map post_reduction_buffer_states; @@ -213,7 +213,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) assert(requirements[reduction.bid].count(pmode) == 0); // task_manager verifies that there are no reduction <-> write-access conflicts } #endif - requirements[reduction.bid][rmode] = GridRegion<3>{{1, 1, 1}}; + requirements[reduction.bid][rmode] = box<3>({0, 0, 0}, {1, 1, 1}); } abstract_command* cmd = nullptr; @@ -285,22 +285,24 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) if(detail::access::mode_traits::is_consumer(mode)) { if(is_local_chunk) { // Store the read access for determining anti-dependencies later on - m_command_buffer_reads[cmd->get_cid()][bid] = GridRegion<3>::merge(m_command_buffer_reads[cmd->get_cid()][bid], req); + m_command_buffer_reads[cmd->get_cid()][bid] = region_union(m_command_buffer_reads[cmd->get_cid()][bid], req); } if(is_local_chunk && !is_pending_reduction) { const auto local_sources = buffer_state.local_last_writer.get_region_values(req); - GridRegion<3> missing_parts; + std::vector> missing_part_boxes; for(const auto& [box, wcs] : local_sources) { + if(box.empty()) continue; if(!wcs.is_fresh()) { - missing_parts = GridRegion<3>::merge(missing_parts, box); + missing_part_boxes.push_back(box); continue; } m_cdag.add_dependency(cmd, m_cdag.get(wcs), dependency_kind::true_dep, dependency_origin::dataflow); } // There is data we don't yet have locally. Generate an await push command for it. - if(!missing_parts.empty()) { + if(!missing_part_boxes.empty()) { + const region missing_parts(std::move(missing_part_boxes)); assert(m_num_nodes > 1); auto* const ap_cmd = create_command(bid, 0, trid, missing_parts); m_cdag.add_dependency(cmd, ap_cmd, dependency_kind::true_dep, dependency_origin::dataflow); @@ -322,7 +324,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // Generate separate push command for each last writer command for now, // possibly even multiple for partially already-replicated data. // TODO: Can and/or should we consolidate? - auto* const push_cmd = create_command(bid, 0, nid, trid, grid_box_to_subrange(replicated_box)); + auto* const push_cmd = create_command(bid, 0, nid, trid, replicated_box.get_subrange()); assert(!utils::isa(m_cdag.get(wcs)) && "Attempting to push non-owned data?!"); m_cdag.add_dependency(push_cmd, m_cdag.get(wcs), dependency_kind::true_dep, dependency_origin::dataflow); generated_pushes.push_back(push_cmd); @@ -343,7 +345,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // generating anti-dependencies around this requirement. This might not be valid if (multivariate) reductions ever operate on regions. if(!generate_reduction) { generate_anti_dependencies(tsk.get_id(), bid, buffer_state.local_last_writer, req, cmd); } - per_buffer_local_writes[bid] = GridRegion<3>::merge(per_buffer_local_writes[bid], req); + per_buffer_local_writes[bid] = region_union(per_buffer_local_writes[bid], req); per_buffer_last_writer_update_list[bid].push_back({req, cmd->get_cid()}); } } @@ -351,8 +353,8 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) if(generate_reduction) { const auto& reduction = *buffer_state.pending_reduction; - const GridBox<3> box{GridPoint<3>{1, 1, 1}}; - const subrange<3> sr{{}, {1, 1, 1}}; + const box<3> box({0, 0, 0}, {1, 1, 1}); + const auto sr = box.get_subrange(); const auto local_last_writer = buffer_state.local_last_writer.get_region_values(box); assert(local_last_writer.size() == 1); @@ -365,7 +367,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) m_cdag.add_dependency(reduce_cmd, m_cdag.get(local_last_writer[0].second), dependency_kind::true_dep, dependency_origin::dataflow); } - auto* const ap_cmd = create_command(bid, reduction.rid, trid, subrange_to_grid_box(sr)); + auto* const ap_cmd = create_command(bid, reduction.rid, trid, sr); m_cdag.add_dependency(reduce_cmd, ap_cmd, dependency_kind::true_dep, dependency_origin::dataflow); generate_epoch_dependencies(ap_cmd); @@ -384,7 +386,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) if(notification_only) { generate_epoch_dependencies(push_cmd); } else { - m_command_buffer_reads[push_cmd->get_cid()][bid] = GridRegion<3>::merge(m_command_buffer_reads[push_cmd->get_cid()][bid], box); + m_command_buffer_reads[push_cmd->get_cid()][bid] = region_union(m_command_buffer_reads[push_cmd->get_cid()][bid], box); m_cdag.add_dependency(push_cmd, m_cdag.get(local_last_writer[0].second), dependency_kind::true_dep, dependency_origin::dataflow); } @@ -447,7 +449,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // These can happen in rare cases, when the node that pushes a buffer range also writes to that range within the same task. // We cannot do this while generating the push command, as we may not have the writing command recorded at that point. for(auto* push_cmd : generated_pushes) { - const auto last_writers = m_buffer_states.at(push_cmd->get_bid()).local_last_writer.get_region_values(subrange_to_grid_box(push_cmd->get_range())); + const auto last_writers = m_buffer_states.at(push_cmd->get_bid()).local_last_writer.get_region_values(region(push_cmd->get_range())); for(const auto& [box, wcs] : last_writers) { assert(!box.empty()); // If we want to push it it cannot be empty @@ -477,14 +479,16 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) // Determine which local data is fresh/stale based on task-level writes. auto requirements = get_buffer_requirements_for_mapped_access(tsk, subrange<3>(tsk.get_global_offset(), tsk.get_global_size()), tsk.get_global_size()); for(auto& [bid, reqs_by_mode] : requirements) { - GridRegion<3> global_writes; + std::vector> global_write_boxes; for(const auto mode : access::producer_modes) { if(reqs_by_mode.count(mode) == 0) continue; - global_writes = GridRegion<3>::merge(global_writes, reqs_by_mode.at(mode)); + const auto& by_mode = reqs_by_mode.at(mode); + global_write_boxes.insert(global_write_boxes.end(), by_mode.get_boxes().begin(), by_mode.get_boxes().end()); } + const region global_writes(std::move(global_write_boxes)); const auto& local_writes = per_buffer_local_writes[bid]; - assert(GridRegion<3>::difference(local_writes, global_writes).empty()); // Local writes have to be a subset of global writes - const auto remote_writes = GridRegion<3>::difference(global_writes, local_writes); + assert(region_difference(local_writes, global_writes).empty()); // Local writes have to be a subset of global writes + const auto remote_writes = region_difference(global_writes, local_writes); auto& buffer_state = m_buffer_states.at(bid); // TODO: We need a way of updating regions in place! E.g. apply_to_values(box, callback) @@ -501,7 +505,7 @@ void distributed_graph_generator::generate_distributed_commands(const task& tsk) } void distributed_graph_generator::generate_anti_dependencies( - task_id tid, buffer_id bid, const region_map& last_writers_map, const GridRegion<3>& write_req, abstract_command* write_cmd) { + task_id tid, buffer_id bid, const region_map& last_writers_map, const region<3>& write_req, abstract_command* write_cmd) { const auto last_writers = last_writers_map.get_region_values(write_req); for(const auto& [box, wcs] : last_writers) { auto* const last_writer_cmd = m_cdag.get(static_cast(wcs)); @@ -523,7 +527,7 @@ void distributed_graph_generator::generate_anti_dependencies( const auto& command_reads = command_reads_it->second; // The task might be a dependent because of another buffer if(const auto buffer_reads_it = command_reads.find(bid); buffer_reads_it != command_reads.end()) { - if(!GridRegion<3>::intersect(write_req, buffer_reads_it->second).empty()) { + if(!region_intersection(write_req, buffer_reads_it->second).empty()) { has_successors = true; m_cdag.add_dependency(write_cmd, cmd, dependency_kind::anti_dep, dependency_origin::dataflow); } diff --git a/src/grid.cc b/src/grid.cc new file mode 100644 index 000000000..6ec7167b8 --- /dev/null +++ b/src/grid.cc @@ -0,0 +1,531 @@ +#include "grid.h" + +namespace celerity::detail::grid_detail { + +// Regions have a storage dimensionality (the `Dims` template parameter of `class region`) and an effective dimensionality that is smaller iff all contained +// boxes are effectively the result of casting e.g. box<2> to box<3>, or the described region "accidentally" is a lower-dimensional slice of the full space. +// This property is detected at runtime through {box,region}::get_min_dimensions(), and all region-algorithm implementations are generic over both StorageDims +// and EffectiveDims to optimize for the embedding of arbitrary-dimensional regions into region<3> as it commonly happens in the runtime. + +// 2-connectivity for 1d boxes, 4-connectivity for 2d boxes and 6-connectivity for 3d boxes. +template +bool boxes_connected(const box& box1, const box& box2) { + static_assert(EffectiveDims <= StorageDims); + + if(box1.empty() || box2.empty()) return false; + + bool touching = false; + for(int d = 0; d < EffectiveDims; ++d) { + const auto min = std::max(box1.get_min()[d], box2.get_min()[d]); + const auto max = std::min(box1.get_max()[d], box2.get_max()[d]); + if(min[d] > max[d]) return false; // fully disconnected, even across corners + if(min[d] == max[d]) { + // when boxes are touching (but not intersecting) in more than one dimension, they can only be connected via corners + if(touching) return false; + touching = true; + } + } + return true; +} + +// Like detail::box_intersection, but aware of effective dimensionality +template +box box_intersection(const box& box1, const box& box2) { + static_assert(EffectiveDims <= StorageDims); + + id min; + id max; + for(int d = 0; d < EffectiveDims; ++d) { + min[d] = std::max(box1.get_min()[d], box2.get_min()[d]); + max[d] = std::min(box1.get_max()[d], box2.get_max()[d]); + if(min[d] >= max[d]) return {}; + } + for(int d = EffectiveDims; d < StorageDims; ++d) { + min[d] = 0; + max[d] = 1; + } + return make_box(non_empty, min, max); +} + +// Like box::covers, but aware of effective dimensionality +template +bool box_covers(const box& top, const box& bottom) { + static_assert(EffectiveDims <= StorageDims); + for(int d = 0; d < EffectiveDims; ++d) { + if(bottom.get_min()[d] < top.get_min()[d]) return false; + if(bottom.get_max()[d] > top.get_max()[d]) return false; + } + return true; +} + +// O(N^2) remove any box A != B for which box_covers(B, A) is true +template +BidirectionalIterator remove_pairwise_covered(BidirectionalIterator first, BidirectionalIterator last) { + for(auto top = first; top != last; ++top) { + top_replaced: + for(auto bottom = std::next(top); bottom != last;) { + if(box_covers(*top, *bottom)) { + *bottom = *--last; + } else if(box_covers(*bottom, *top)) { + *top = *bottom; + *bottom = *--last; + goto top_replaced; // NOLINT(cppcoreguidelines-avoid-goto) + } else { + ++bottom; + } + } + } + return last; +} + +// Partition a range of boxes into intervals described by a grid of dissection lines, and invoke a user function on each partition. +template +void for_each_dissection_interval(BidirectionalIterator first, BidirectionalIterator last, const std::vector>& cuts, Fn&& f, int dim = 0) { + using box_type = typename std::iterator_traits::value_type; + + assert(first != last); + + if(cuts.size() <= static_cast(dim)) { + // We are past the last dissected dimension, so the interval is just our entire input range + f(first, last); + return; + } + + // Since boxes can never cross a dissection line, we can partition the range into dissection intervals by sorting along one dimension + std::sort(first, last, [dim](const box_type& lhs, const box_type& rhs) { return lhs.get_min()[dim] < rhs.get_min()[dim]; }); + + auto next_cut = cuts[dim].begin(); + while(first != last) { + // The current box `first` always belongs to our interval. Now find, in O(log N), the dissection line that marks the end of this interval + next_cut = std::upper_bound(next_cut, cuts[dim].end(), first->get_min()[dim]); + assert(next_cut != cuts[dim].end()); + + // Find, in O(log N), the end iterator of our interval by searching the first item that is "right" of the dissection line + const auto next = std::lower_bound(first, last, *next_cut, [dim](const box_type& lhs, const size_t cut) { return lhs.get_min()[dim] < cut; }); + + // Recurse into the found interval along the next (faster) dimension + for_each_dissection_interval(first, next, cuts, f, dim + 1); + + first = next; + } +} + +// Like remove_pairwise_covered(first, last), but at lower average complexity for a range of boxes that are dissected according to `cuts`. +template +BidirectionalIterator remove_pairwise_covered(BidirectionalIterator first, BidirectionalIterator last, const std::vector>& cuts) { + using box_type [[maybe_unused]] = typename std::iterator_traits::value_type; + + assert(cuts.size() <= EffectiveDims); + assert(std::all_of(cuts.begin(), cuts.end(), [](const std::vector& dim_cuts) { return std::is_sorted(dim_cuts.begin(), dim_cuts.end()); })); + + if(first == last || std::next(first) == last) return last; + + // We compact the range in-place after each removal by left-shifting each de-duplicated range + auto last_out = first; + + for_each_dissection_interval(first, last, cuts, [&](const BidirectionalIterator i_first, const BidirectionalIterator i_last) { + // Delegate the interval to the O(N^2) overload of remove_pairwise_covered + const auto last_retained = remove_pairwise_covered(i_first, i_last); + // for_each_dissection_interval will not touch [first, i_last) after this iteration + last_out = std::move(i_first, last_retained, last_out); + }); + + return last_out; +} + +// In a range of boxes that are identical in all dimensions except MergeDim, merge all connected boxes ("unconditional directional merge") +template +BidirectionalIterator merge_connected_intervals(BidirectionalIterator first, BidirectionalIterator last) { + using box_type = typename std::iterator_traits::value_type; + + if(first == last || std::next(first) == last) return last; // common-case shortcut: no merge is possible + + // Sort by interval starting point + std::sort(first, last, [](const box_type& lhs, const box_type& rhs) { return lhs.get_min()[MergeDim] < rhs.get_min()[MergeDim]; }); + + // The range is both read and written from left-to-right, avoiding repeated left-shifts for compaction + auto last_out = first; + + // Merge all connected boxes along MergeDim in O(N) by replacing each connected sequence with its bounding box + while(first != last) { + const auto merged_min = first->get_min(); + auto merged_max = first->get_max(); + for(++first; first != last && first->get_min()[MergeDim] <= merged_max[MergeDim]; ++first) { + merged_max[MergeDim] = std::max(merged_max[MergeDim], first->get_max()[MergeDim]); + } + *last_out++ = make_box(grid_detail::non_empty, merged_min, merged_max); + } + + return last_out; +} + +// In an arbitrary range of boxes, merge all boxes that are identical in all dimensions except MergeDim ("conditional directional merge"). +template +BidirectionalIterator merge_connected_boxes_along_dim(const BidirectionalIterator first, const BidirectionalIterator last) { + using box_type = typename std::iterator_traits::value_type; + static_assert(EffectiveDims <= box_type::dimensions); + static_assert(MergeDim < EffectiveDims); + + constexpr auto orthogonal_to_merge_dim = [](const box_type& lhs, const box_type& rhs) { + for(int d = 0; d < EffectiveDims; ++d) { + if(d == MergeDim) continue; + // arbitrary but consistent ordering along all orthogonal dimensions + if(lhs.get_min()[d] < rhs.get_min()[d]) return true; + if(lhs.get_min()[d] > rhs.get_min()[d]) return false; + if(lhs.get_max()[d] < rhs.get_max()[d]) return true; + if(lhs.get_max()[d] > rhs.get_max()[d]) return false; + } + return false; + }; + + if constexpr(EffectiveDims == 1) { + return merge_connected_intervals(first, last); + } else { + // partition [first, last) into sequences of boxes that are potentially mergeable wrt/ the dimensions orthogonal to MergeDim. + // This reduces complexity from O(n^3) to O(n log n) + O(m^3), where m is the longest mergeable sequence in that regard. + std::sort(first, last, orthogonal_to_merge_dim); + + // we want the result to be contiguous in [first, last_out), so in each iteration, we merge all boxes of a MergeDim-equal partition at their original + // position in the iterator range; and then shift the merged range back to fill any gap left by merge of a previous partition. + auto last_out = first; + + for(auto first_equal = first; first_equal != last;) { + // O(n) std::find_if could be replaced by O(log n) std::partition_point, but we expect the number of "equal" elements to be small + const auto last_equal = std::find_if(std::next(first_equal), last, [&](const box_type& box) { + return orthogonal_to_merge_dim(*first_equal, box); // true if box is in a partition _after_ *first_equal + }); + const auto last_merged = merge_connected_intervals(first_equal, last_equal); + // shift the newly merged boxes to the left to close any gap opened by the merge of a previous partition + last_out = std::move(first_equal, last_merged, last_out); + first_equal = last_equal; + } + + return last_out; + } +} + +// explicit instantiations for tests (might otherwise be inlined) +template std::vector>::iterator merge_connected_boxes_along_dim<0, 1>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<0, 2>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<1, 2>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<0, 3>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<1, 3>(std::vector>::iterator first, std::vector>::iterator last); +template std::vector>::iterator merge_connected_boxes_along_dim<2, 3>(std::vector>::iterator first, std::vector>::iterator last); + +// For higher-dimensional regions, the order in which dimensions are merged is relevant for the shape of the resulting box set. We merge along the last +// ("fastest") dimension first to make sure the resulting boxes cover the largest possible extent of contiguous memory when are applied to buffers. +template +BidirectionalIterator merge_connected_boxes_recurse(const BidirectionalIterator first, BidirectionalIterator last) { + static_assert(MergeDim >= 0 && MergeDim < EffectiveDims); + last = merge_connected_boxes_along_dim(first, last); + if constexpr(MergeDim > 0) { last = merge_connected_boxes_recurse(first, last); } + return last; +} + +// Merge all adjacent boxes that are connected and identical in all except a single dimension. +template +BidirectionalIterator merge_connected_boxes(const BidirectionalIterator first, BidirectionalIterator last) { + using box_type = typename std::iterator_traits::value_type; + static_assert(EffectiveDims <= box_type::dimensions); + if constexpr(EffectiveDims > 0) { last = merge_connected_boxes_recurse(first, last); } + return last; +} + +// Split a box into parts according to dissection lines in `cuts`, where `cuts` is indexed by component dimension. This function is not generic +// over EffectiveDims, rather, `cuts` will have 1 <= n <= StorageDims entries to indicate along how many dimensions the box should be dissected. +template +void dissect_box(const box& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim) { + assert(dim < static_cast(cuts.size())); + + const auto& dim_cuts = cuts[static_cast(dim)]; + assert(std::is_sorted(dim_cuts.begin(), dim_cuts.end())); + + // start of the first (current) dissected box + size_t start = in_box.get_min()[dim]; + // find the first cut that lies inside the box (dim_cuts is sorted) + auto cut_it = std::lower_bound(dim_cuts.begin(), dim_cuts.end(), /* not less or equal */ start + 1); + + for(;;) { + // the end of the current box is either the last cut that lies inside the box, or the end of in_box + size_t end; + if(cut_it != dim_cuts.end() && *cut_it < in_box.get_max()[dim]) { + end = *cut_it++; + } else { + end = in_box.get_max()[dim]; + } + if(end == start) break; + + // compute coordinates for the dissected box along `dim`, and recursively dissect it further along `dim + 1` + auto min = in_box.get_min(); + auto max = in_box.get_max(); + min[dim] = start; + max[dim] = end; + const auto small_box = make_box(grid_detail::non_empty, min, max); + if(dim + 1 < static_cast(cuts.size())) { + dissect_box(small_box, cuts, out_dissected, dim + 1); + } else { + out_dissected.push_back(small_box); + } + + start = end; + } +} + +// explicit instantiations for tests (might otherwise be inlined) +template void dissect_box(const box<2>& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim); +template void dissect_box(const box<3>& in_box, const std::vector>& cuts, std::vector>& out_dissected, int dim); + +// Apply dissect_box to all boxes in a range, with a shortcut if no cuts are to be done. +template +void dissect_boxes(const InputIterator first, const InputIterator last, const std::vector>& cuts, + std::vector::value_type>& out_dissected) { + if(!cuts.empty()) { + for(auto it = first; it != last; ++it) { + dissect_box(*it, cuts, out_dissected, 0); + } + } else { + out_dissected.insert(out_dissected.end(), first, last); + } +} + +// Collect the sorted, unique list of box start- and end points along a single dimension. These can then be used in dissect_boxes. +template +std::vector collect_dissection_lines(const InputIterator first, const InputIterator last, int dim) { + std::vector cuts; + // allocating 2*N integers might seem wasteful, but this has negligible runtime in the profiler and is already algorithmically optimal at O(N log N) + cuts.reserve(std::distance(first, last) * 2); + for(auto it = first; it != last; ++it) { + cuts.push_back(it->get_min()[dim]); + cuts.push_back(it->get_max()[dim]); + } + std::sort(cuts.begin(), cuts.end()); + cuts.erase(std::unique(cuts.begin(), cuts.end()), cuts.end()); + assert(first == last || cuts.size() >= 2); + return cuts; +} + +template +void normalize_impl(std::vector>& boxes) { + static_assert(EffectiveDims <= StorageDims); + + // (hopefully) fast path: attempt to merge without dissecting first + boxes.erase(merge_connected_boxes(boxes.begin(), boxes.end()), boxes.end()); + assert(!boxes.empty()); + if(boxes.size() == 1) { return; } + + // 1. dissect boxes along the edges of all other boxes (except the last, "fastest" dim) to create the "maximally mergeable set" of small boxes for step 2 + std::vector> cuts; + if constexpr(EffectiveDims > 1) { + cuts.resize(EffectiveDims - 1); + for(int d = 0; d < EffectiveDims - 1; ++d) { + cuts[static_cast(d)] = collect_dissection_lines(boxes.begin(), boxes.end(), d); + } + + std::vector> disjoint_boxes; + dissect_boxes(boxes.begin(), boxes.end(), cuts, disjoint_boxes); + boxes = std::move(disjoint_boxes); + } + + // 2. remove all overlap by removing pairwise coverings + const auto first = boxes.begin(); + auto last = boxes.end(); + if constexpr(EffectiveDims != 1) { // in 1D regions, merge_adjacent will already remove all overlaps + last = remove_pairwise_covered(first, last, cuts); + } + + // 3. merge the overlap-free tiling of boxes where possible + last = merge_connected_boxes(first, last); + boxes.erase(last, boxes.end()); +} + +// For any set of boxes, find the unique box tiling that covers the same points and is subject to the following constraints: +// 1. the extent of every box is maximized along the last dimension, then along the second-to-last dimension, and so forth. +// 2. no two boxes within the tiling intersect (i.e. cover a common point). +// 3. the tiling contains no empty boxes. +// 4. the normalized sequence is sorted according to box_coordinate_order. +// There is exactly one sequence of boxes for any set of points that fulfills 1-4, meaning that an "==" comparison of normalized tilings would be equivalent +// to an equality comparision of the covered point sets. +template +void normalize(std::vector>& boxes) { + boxes.erase(std::remove_if(boxes.begin(), boxes.end(), std::mem_fn(&box::empty)), boxes.end()); + if(boxes.size() <= 1) return; + + const auto effective_dims = get_min_dimensions(boxes.begin(), boxes.end()); + assert(effective_dims <= Dims); + + // clang-format off + switch(effective_dims) { + case 0: normalize_impl<0>(boxes); break; + case 1: if constexpr(Dims >= 1) { normalize_impl<1>(boxes); break; } + case 2: if constexpr(Dims >= 2) { normalize_impl<2>(boxes); break; } + case 3: if constexpr(Dims >= 3) { normalize_impl<3>(boxes); break; } + default: abort(); // unreachable with the explicit instantiations in this file + } + // clang-format on + + std::sort(boxes.begin(), boxes.end(), box_coordinate_order()); +} + +// explicit instantiations for tests (might otherwise be inlined into region::region) +template void normalize(std::vector> &boxes); +template void normalize(std::vector> &boxes); +template void normalize(std::vector> &boxes); +template void normalize(std::vector> &boxes); + +template +region region_intersection_impl(const region& lhs, const region& rhs) { + static_assert(EffectiveDims <= StorageDims); + + // O(N * M). This can probably be improved for large inputs by dissecting either lhs or rhs by the lines of the other and then performing an interval + // search similar to how remove_pairwise_covered operates. + std::vector> intersection; + for(const auto& left : lhs.get_boxes()) { + for(const auto& right : rhs.get_boxes()) { + if(const auto box = grid_detail::box_intersection(left, right); !box.empty()) { intersection.push_back(box); } + } + } + + // No dissection step is necessary as the intersection of two normalized tilings is already "maximally mergeable". + const auto first = intersection.begin(); + auto last = intersection.end(); + last = grid_detail::merge_connected_boxes(first, last); + + // intersected_boxes retains the sorting from lhs, but for Dims > 1, the intersection can shift min-points such that the box_coordinate_order reverses. + if constexpr(EffectiveDims > 1) { + std::sort(first, last, box_coordinate_order()); + } else { + assert(std::is_sorted(first, last, box_coordinate_order())); + } + + intersection.erase(last, intersection.end()); + return grid_detail::make_region(grid_detail::normalized, std::move(intersection)); +} + +// Complete the region_difference operation with an already dissected left-hand side and knowledge of effective dimensionality. +template +void apply_region_difference(std::vector>& dissected_left, const region& rhs) { + static_assert(EffectiveDims <= StorageDims); + + // O(N * M) remove all dissected boxes from lhs that are fully covered by any box in rhs + const auto first_left = dissected_left.begin(); + auto last_left = dissected_left.end(); + for(const auto& right : rhs.get_boxes()) { + for(auto left_it = first_left; left_it != last_left;) { + if(grid_detail::box_covers(right, *left_it)) { + *left_it = *--last_left; + } else { + ++left_it; + } + } + } + + // merge the now non-overlapping boxes + last_left = grid_detail::merge_connected_boxes(first_left, last_left); + dissected_left.erase(last_left, dissected_left.end()); +} + +} // namespace celerity::detail::grid_detail + +namespace celerity::detail { + +template +region::region(const box& single_box) : region(std::vector{single_box}) {} // still need to normalize in case single_box is empty + +template +region::region(const subrange& single_sr) : region(box(single_sr)) {} + +template +region::region(std::vector&& boxes) : region(grid_detail::normalized, (/* in-place */ grid_detail::normalize(boxes), /* then */ std::move(boxes))) {} + +template +region::region(grid_detail::normalized_t /* tag */, std::vector&& boxes) : m_boxes(std::move(boxes)) {} + +template class region<0>; +template class region<1>; +template class region<2>; +template class region<3>; + +template +region region_union(const region& lhs, const region& rhs) { + // shortcut-evaluate trivial cases + if(lhs.empty()) return rhs; + if(rhs.empty()) return lhs; + + std::vector> box_union; + box_union.reserve(lhs.get_boxes().size() + rhs.get_boxes().size()); + box_union.insert(box_union.end(), lhs.get_boxes().begin(), lhs.get_boxes().end()); + box_union.insert(box_union.end(), rhs.get_boxes().begin(), rhs.get_boxes().end()); + return region(std::move(box_union)); +} + +template region<0> region_union(const region<0>& lhs, const region<0>& rhs); +template region<1> region_union(const region<1>& lhs, const region<1>& rhs); +template region<2> region_union(const region<2>& lhs, const region<2>& rhs); +template region<3> region_union(const region<3>& lhs, const region<3>& rhs); + +template +region region_intersection(const region& lhs, const region& rhs) { + // shortcut-evaluate trivial cases + if(lhs.empty() || rhs.empty()) return {}; + + const auto effective_dims = std::max(lhs.get_min_dimensions(), rhs.get_min_dimensions()); + assert(effective_dims <= Dims); + + // clang-format off + switch(effective_dims) { + case 0: return grid_detail::region_intersection_impl<0>(lhs, rhs); + case 1: if constexpr(Dims >= 1) { return grid_detail::region_intersection_impl<1>(lhs, rhs); } + case 2: if constexpr(Dims >= 2) { return grid_detail::region_intersection_impl<2>(lhs, rhs); } + case 3: if constexpr(Dims >= 3) { return grid_detail::region_intersection_impl<3>(lhs, rhs); } + } + // clang-format on + abort(); // unreachable +} + +template region<0> region_intersection(const region<0>& lhs, const region<0>& rhs); +template region<1> region_intersection(const region<1>& lhs, const region<1>& rhs); +template region<2> region_intersection(const region<2>& lhs, const region<2>& rhs); +template region<3> region_intersection(const region<3>& lhs, const region<3>& rhs); + +template +region region_difference(const region& lhs, const region& rhs) { + // shortcut-evaluate trivial cases + if(lhs.empty()) return {}; + if(rhs.empty()) return lhs; + + // the resulting effective_dims can never be greater than the lhs dimension, but the difference operator must still operate on all available dimensions + // to correctly identify overlapping boxes + const auto effective_dims = std::max(lhs.get_min_dimensions(), rhs.get_min_dimensions()); + assert(effective_dims <= Dims); + + // 1. collect dissection lines (in *all* dimensions) from rhs + std::vector> cuts(effective_dims); + for(int d = 0; d < effective_dims; ++d) { + cuts[static_cast(d)] = grid_detail::collect_dissection_lines(rhs.get_boxes().begin(), rhs.get_boxes().end(), d); + } + + // 2. dissect lhs according to the lines of rhs, so that any overlap between lhs and rhs is turned into an lhs box fully covered by an rhs box + std::vector> dissected_left; + grid_detail::dissect_boxes(lhs.get_boxes().begin(), lhs.get_boxes().end(), cuts, dissected_left); + + // clang-format off + switch(effective_dims) { + case 0: grid_detail::apply_region_difference<0>(dissected_left, rhs); break; + case 1: if constexpr(Dims >= 1) { grid_detail::apply_region_difference<1>(dissected_left, rhs); break; } + case 2: if constexpr(Dims >= 2) { grid_detail::apply_region_difference<2>(dissected_left, rhs); break; } + case 3: if constexpr(Dims >= 3) { grid_detail::apply_region_difference<3>(dissected_left, rhs); break; } + default: abort(); // unreachable + } + // clang-format on + + std::sort(dissected_left.begin(), dissected_left.end(), box_coordinate_order()); + + return grid_detail::make_region(grid_detail::normalized, std::move(dissected_left)); +} + +template region<0> region_difference(const region<0>& lhs, const region<0>& rhs); +template region<1> region_difference(const region<1>& lhs, const region<1>& rhs); +template region<2> region_difference(const region<2>& lhs, const region<2>& rhs); +template region<3> region_difference(const region<3>& lhs, const region<3>& rhs); + +} // namespace celerity::detail \ No newline at end of file diff --git a/src/print_graph.cc b/src/print_graph.cc index 563450a08..4bb49aeba 100644 --- a/src/print_graph.cc +++ b/src/print_graph.cc @@ -124,7 +124,7 @@ std::string get_command_label(const node_id local_nid, const command_record& cmd cmd.transfer_id.value(), buffer_label, cmd.await_region.value()); } break; case command_type::reduction: { - fmt::format_to(std::back_inserter(label), "reduction R{}
{} {}", cmd.reduction_id.value(), buffer_label, GridRegion<3>{{1, 1, 1}}); + fmt::format_to(std::back_inserter(label), "reduction R{}
{} {}", cmd.reduction_id.value(), buffer_label, box<3>{{0, 0, 0}, {1, 1, 1}}); } break; case command_type::horizon: { label += "horizon"; diff --git a/src/print_utils.cc b/src/print_utils.cc deleted file mode 100644 index fc7c9cd51..000000000 --- a/src/print_utils.cc +++ /dev/null @@ -1,24 +0,0 @@ -#include "print_utils.h" - -#include - -#include - -namespace celerity { -namespace detail { - - std::ostream& print_chunk3(std::ostream& os, chunk<3> chnk3) { - auto start = chnk3.offset; - auto end = chnk3.offset + chnk3.range; - auto size = chnk3.global_size; - return os << fmt::format("[{},{},{}] - [{},{},{}] : {{{},{},{}}}", start[0], start[1], start[2], end[0], end[1], end[2], size[0], size[1], size[2]); - } - - std::ostream& print_subrange3(std::ostream& os, subrange<3> subr3) { - auto start = subr3.offset; - auto end = subr3.offset + subr3.range; - return os << fmt::format("[{},{},{}] - [{},{},{}]", start[0], start[1], start[2], end[0], end[1], end[2]); - } - -} // namespace detail -} // namespace celerity diff --git a/src/task.cc b/src/task.cc index b552879b1..c8cf40480 100644 --- a/src/task.cc +++ b/src/task.cc @@ -33,17 +33,17 @@ namespace detail { return subrange<3>{}; } - GridRegion<3> buffer_access_map::get_mode_requirements( + region<3> buffer_access_map::get_mode_requirements( const buffer_id bid, const access_mode mode, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const { - GridRegion<3> result; + std::vector> boxes; for(size_t i = 0; i < m_accesses.size(); ++i) { if(m_accesses[i].first != bid || m_accesses[i].second->get_access_mode() != mode) continue; - result = GridRegion<3>::merge(result, get_requirements_for_nth_access(i, kernel_dims, sr, global_size)); + boxes.push_back(get_requirements_for_nth_access(i, kernel_dims, sr, global_size)); } - return result; + return region(std::move(boxes)); } - GridBox<3> buffer_access_map::get_requirements_for_nth_access( + box<3> buffer_access_map::get_requirements_for_nth_access( const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const { const auto& [_, rm] = m_accesses[n]; @@ -56,7 +56,7 @@ namespace detail { case 3: req = apply_range_mapper<3>(rm.get(), chunk_cast<3>(chnk)); break; default: assert(!"Unreachable"); } - return subrange_to_grid_box(req); + return req; } void side_effect_map::add_side_effect(const host_object_id hoid, const experimental::side_effect_order order) { diff --git a/src/task_manager.cc b/src/task_manager.cc index 3f8ddce6a..77ec4213f 100644 --- a/src/task_manager.cc +++ b/src/task_manager.cc @@ -17,7 +17,7 @@ namespace detail { void task_manager::add_buffer(buffer_id bid, const int dims, const range<3>& range, bool host_initialized) { m_buffers_last_writers.emplace(std::piecewise_construct, std::tuple{bid}, std::tuple{range, dims}); - if(host_initialized) { m_buffers_last_writers.at(bid).update_region(subrange_to_grid_box(subrange<3>({}, range)), m_epoch_for_new_tasks); } + if(host_initialized) { m_buffers_last_writers.at(bid).update_region(subrange<3>({}, range), m_epoch_for_new_tasks); } } const task* task_manager::find_task(task_id tid) const { return m_task_buffer.find_task(tid); } @@ -53,14 +53,15 @@ namespace detail { void task_manager::await_epoch(task_id epoch) { m_latest_epoch_reached.await(epoch); } - GridRegion<3> get_requirements(const task& tsk, buffer_id bid, const std::vector modes) { + region<3> get_requirements(const task& tsk, buffer_id bid, const std::vector &modes) { const auto& access_map = tsk.get_buffer_access_map(); const subrange<3> full_range{tsk.get_global_offset(), tsk.get_global_size()}; - GridRegion<3> result; + std::vector> boxes; for(auto m : modes) { - result = GridRegion<3>::merge(result, access_map.get_mode_requirements(bid, m, tsk.get_dimensions(), full_range, tsk.get_global_size())); + const auto req = access_map.get_mode_requirements(bid, m, tsk.get_dimensions(), full_range, tsk.get_global_size()); + boxes.insert(boxes.end(), req.get_boxes().begin(), req.get_boxes().end()); } - return result; + return region(std::move(boxes)); } void task_manager::compute_dependencies(task& tsk) { @@ -92,7 +93,7 @@ namespace detail { // Determine reader dependencies if(std::any_of(modes.cbegin(), modes.cend(), detail::access::mode_traits::is_consumer) || (reduction.has_value() && reduction->init_from_buffer)) { auto read_requirements = get_requirements(tsk, bid, {detail::access::consumer_modes.cbegin(), detail::access::consumer_modes.cend()}); - if(reduction.has_value()) { read_requirements = GridRegion<3>::merge(read_requirements, GridRegion<3>{{1, 1, 1}}); } + if(reduction.has_value()) { read_requirements = region_union(read_requirements, box<3>({0, 0, 0}, {1, 1, 1})); } const auto last_writers = m_buffers_last_writers.at(bid).get_region_values(read_requirements); for(auto& p : last_writers) { @@ -107,7 +108,7 @@ namespace detail { // Update last writers and determine anti-dependencies if(std::any_of(modes.cbegin(), modes.cend(), detail::access::mode_traits::is_producer) || reduction.has_value()) { auto write_requirements = get_requirements(tsk, bid, {detail::access::producer_modes.cbegin(), detail::access::producer_modes.cend()}); - if(reduction.has_value()) { write_requirements = GridRegion<3>::merge(write_requirements, GridRegion<3>{{1, 1, 1}}); } + if(reduction.has_value()) { write_requirements = region_union(write_requirements, box<3>({0, 0, 0}, {1, 1, 1})); } if(write_requirements.empty()) continue; const auto last_writers = m_buffers_last_writers.at(bid).get_region_values(write_requirements); @@ -128,7 +129,7 @@ namespace detail { const auto dependent_read_requirements = get_requirements(*dependent.node, bid, {detail::access::consumer_modes.cbegin(), detail::access::consumer_modes.cend()}); // Only add an anti-dependency if we are really writing over the region read by this task - if(!GridRegion<3>::intersect(write_requirements, dependent_read_requirements).empty()) { + if(!region_intersection(write_requirements, dependent_read_requirements).empty()) { add_dependency(tsk, *dependent.node, dependency_kind::anti_dep, dependency_origin::dataflow); has_anti_dependents = true; } diff --git a/src/worker_job.cc b/src/worker_job.cc index 9ef1abcd2..046932b63 100644 --- a/src/worker_job.cc +++ b/src/worker_job.cc @@ -158,7 +158,7 @@ namespace detail { access_infos.reserve(access_map.get_num_accesses()); for(size_t i = 0; i < access_map.get_num_accesses(); ++i) { const auto [bid, mode] = access_map.get_nth_access(i); - const auto sr = grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size())); + const auto sr = access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); const auto info = m_buffer_mngr.access_host_buffer(bid, mode, sr); access_infos.push_back(closure_hydrator::accessor_info{info.ptr, info.backing_buffer_range, info.backing_buffer_offset, sr}); } @@ -212,7 +212,7 @@ namespace detail { for(size_t i = 0; i < access_map.get_num_accesses(); ++i) { const auto [bid, mode] = access_map.get_nth_access(i); - const auto sr = grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size())); + const auto sr = access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); try { const auto info = m_buffer_mngr.access_device_buffer(bid, mode, sr); @@ -262,7 +262,7 @@ namespace detail { if(oob_max != id<3>{1, 1, 1}) { const auto& access_map = tsk->get_buffer_access_map(); const auto acc_sr = - grid_box_to_subrange(access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size())); + access_map.get_requirements_for_nth_access(i, tsk->get_dimensions(), data.sr, tsk->get_global_size()).get_subrange(); const auto oob_sr = subrange<3>(oob_min, range_cast<3>(oob_max - oob_min)); CELERITY_ERROR("Out-of-bounds access in kernel '{}' detected: Accessor {} for buffer {} attempted to access indices between {} which are " "outside of mapped subrange {}", diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0855b1635..46eaedbab 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -32,6 +32,7 @@ set(TEST_TARGETS graph_gen_reduction_tests graph_gen_transfer_tests graph_compaction_tests + grid_tests intrusive_graph_tests print_graph_tests region_map_tests @@ -45,8 +46,8 @@ set(TEST_TARGETS device_selection_tests ) -add_library(test_main test_main.cc) -set_test_target_parameters(test_main test_main.cc) +add_library(test_main test_main.cc grid_test_utils.cc) +set_test_target_parameters(test_main test_main.cc grid_test_utils.cc) set(TEST_OBJ_LIST "") foreach(TEST_TARGET ${TEST_TARGETS}) @@ -74,9 +75,9 @@ target_link_libraries(all_tests PRIVATE test_main) set_test_target_parameters(all_tests "") # Unit benchmark executable -add_executable(benchmarks benchmarks.cc benchmark_reporters.cc) +add_executable(benchmarks dag_benchmarks.cc grid_benchmarks.cc benchmark_reporters.cc) target_link_libraries(benchmarks PRIVATE test_main) -set_test_target_parameters(benchmarks benchmarks.cc) +set_test_target_parameters(benchmarks dag_benchmarks.cc grid_benchmarks.cc) add_subdirectory(system) if(CELERITY_DETAIL_INTEGRATION_TESTING) @@ -95,8 +96,11 @@ find_library(CAIRO_LIBRARIES ) if(CAIRO_INCLUDE_DIRS AND CAIRO_LIBRARIES) - target_compile_definitions(region_map_tests_OBJ PRIVATE CELERITY_DETAIL_HAVE_CAIRO=1) - target_include_directories(region_map_tests_OBJ PRIVATE ${CAIRO_INCLUDE_DIRS}) - target_link_libraries(region_map_tests PRIVATE ${CAIRO_LIBRARIES}) + message(STATUS "Building tests with cairo support: ${CAIRO_LIBRARIES}") + foreach(TEST_TARGET region_map_tests;region_map_tests_OBJ;test_main) + target_compile_definitions(${TEST_TARGET} PRIVATE CELERITY_DETAIL_HAVE_CAIRO=1) + target_include_directories(${TEST_TARGET} PRIVATE ${CAIRO_INCLUDE_DIRS}) + target_link_libraries(${TEST_TARGET} PRIVATE ${CAIRO_LIBRARIES}) + endforeach() target_link_libraries(all_tests PRIVATE ${CAIRO_LIBRARIES}) -endif() \ No newline at end of file +endif() diff --git a/test/accessor_tests.cc b/test/accessor_tests.cc index d3c8d21d7..e8cca8fe2 100644 --- a/test/accessor_tests.cc +++ b/test/accessor_tests.cc @@ -162,7 +162,7 @@ namespace detail { // #if __SYCL_DEVICE_ONLY__ did get rid of the segfault, but caused the test to fail with a heap corruption at runtime. Instead, replacing id // with size_t seems to resolve the problem. - const auto range = range_cast(celerity::range<3>(2, 3, 4)); + const auto range = test_utils::truncate_range({2, 3, 4}); auto& bm = accessor_fixture::get_buffer_manager(); auto bid = bm.template register_buffer(range_cast<3>(range)); @@ -170,15 +170,15 @@ namespace detail { auto sr = subrange<3>({}, range_cast<3>(range)); // this kernel initializes the buffer what will be read after. - auto acc_write = accessor_fixture::template get_device_accessor(bid, range_cast(range), {}); + auto acc_write = accessor_fixture::template get_device_accessor(bid, range, {}); test_utils::run_parallel_for>(accessor_fixture::get_device_queue().get_sycl_queue(), - range_cast(range), {}, [=](celerity::item item) { acc_write[item] = item.get_linear_id(); }); + range, {}, [=](celerity::item item) { acc_write[item] = item.get_linear_id(); }); SECTION("for device buffers") { - auto acc_read = accessor_fixture::template get_device_accessor(bid, range_cast(range), {}); - auto acc = accessor_fixture::template get_device_accessor(bid, range_cast(range), {}); + auto acc_read = accessor_fixture::template get_device_accessor(bid, range, {}); + auto acc = accessor_fixture::template get_device_accessor(bid, range, {}); test_utils::run_parallel_for>( - accessor_fixture::get_device_queue().get_sycl_queue(), range_cast(range), {}, [=](celerity::item item) { + accessor_fixture::get_device_queue().get_sycl_queue(), range, {}, [=](celerity::item item) { size_t i = item[0]; size_t j = item[1]; if constexpr(Dims == 2) { @@ -191,8 +191,8 @@ namespace detail { } SECTION("for host buffers") { - auto acc_read = accessor_fixture::template get_host_accessor(bid, range_cast(range), {}); - auto acc = accessor_fixture::template get_host_accessor(bid, range_cast(range), {}); + auto acc_read = accessor_fixture::template get_host_accessor(bid, range, {}); + auto acc = accessor_fixture::template get_host_accessor(bid, range, {}); for(size_t i = 0; i < range[0]; i++) { for(size_t j = 0; j < range[1]; j++) { for(size_t k = 0; k < (Dims == 2 ? 1 : range[2]); k++) { @@ -207,8 +207,8 @@ namespace detail { } typename accessor_fixture::access_target tgt = accessor_fixture::access_target::host; - bool acc_check = accessor_fixture::template buffer_reduce>(bid, tgt, range_cast(range), - {}, true, [range = range_cast(range)](id idx, bool current, size_t value) { return current && value == get_linear_index(range, idx); }); + bool acc_check = accessor_fixture::template buffer_reduce>(bid, tgt, range, + {}, true, [range = range](id idx, bool current, size_t value) { return current && value == get_linear_index(range, idx); }); REQUIRE(acc_check); } @@ -257,7 +257,7 @@ namespace detail { buffer verify_buf{&verified, 1}; q.submit([&](handler& cgh) { // access with offset == buffer range just to mess with things - const auto offset = id_cast<1>(test_buf.get_range()); + const auto offset = id(test_buf.get_range()); const auto test_acc = test_buf.get_access(cgh, [=](chunk<1>) { return subrange<1>{offset, 0}; }); const auto verify_acc = verify_buf.get_access(cgh, one_to_one{}); cgh.parallel_for>(range<1>{1}, [=](item<1>) { @@ -654,10 +654,10 @@ namespace detail { #if !CELERITY_ACCESSOR_BOUNDARY_CHECK SKIP("CELERITY_ACCESSOR_BOUNDARY_CHECK=0"); #endif - buffer buff(range_cast(range<3>{10, 20, 30})); - const auto accessible_sr = subrange_cast(subrange<3>{{5, 10, 15}, {1, 2, 3}}); - const auto oob_idx_lo = id_cast(id<3>{1, 2, 3}); - const auto oob_idx_hi = id_cast(id<3>{7, 13, 25}); + buffer buff(test_utils::truncate_range({10, 20, 30})); + const auto accessible_sr = test_utils::truncate_subrange({{5, 10, 15}, {1, 2, 3}}); + const auto oob_idx_lo = test_utils::truncate_id({1, 2, 3}); + const auto oob_idx_hi = test_utils::truncate_id({7, 13, 25}); // we need to be careful about the orderign of the construction and destruction // of the Celerity queue and the log capturing utility here diff --git a/test/backend_tests.cc b/test/backend_tests.cc index d6d899410..a63509614 100644 --- a/test/backend_tests.cc +++ b/test/backend_tests.cc @@ -44,11 +44,11 @@ void verify_copied_linear_ids(const size_t* host_buf, const range& source_ template struct copy_parameters { - range source_range = range_cast(range<3>(5, 7, 11)); - range target_range = range_cast(range<3>(13, 17, 19)); - range copy_range = range_cast(range<3>(2, 4, 8)); - id source_offset = id_cast(id<3>(2, 2, 2)); - id target_offset = id_cast(id<3>(3, 5, 7)); + range source_range = test_utils::truncate_range({5, 7, 11}); + range target_range = test_utils::truncate_range({13, 17, 19}); + range copy_range = test_utils::truncate_range({2, 4, 8}); + id source_offset = test_utils::truncate_id({2, 2, 2}); + id target_offset = test_utils::truncate_id({3, 5, 7}); }; template diff --git a/test/benchmarks.cc b/test/dag_benchmarks.cc similarity index 100% rename from test/benchmarks.cc rename to test/dag_benchmarks.cc diff --git a/test/graph_gen_granularity_tests.cc b/test/graph_gen_granularity_tests.cc index 4998c255e..567aa5221 100644 --- a/test/graph_gen_granularity_tests.cc +++ b/test/graph_gen_granularity_tests.cc @@ -71,13 +71,13 @@ TEMPLATE_TEST_CASE_SIG("distributed_graph_generator does not create empty chunks task_id tid = -1; SECTION("for simple tasks") { - task_range = range_cast(range<3>(2, 2, 2)); + task_range = truncate_range({2, 2, 2}); tid = dctx.device_compute>(task_range).submit(); } SECTION("for nd-range tasks") { - task_range = range_cast(range<3>(16, 2, 2)); - const auto local_range = range_cast(range<3>(8, 1, 1)); + task_range = truncate_range({16, 2, 2}); + const auto local_range = truncate_range({8, 1, 1}); tid = dctx.device_compute>(nd_range(task_range, local_range)).submit(); } diff --git a/test/graph_generation_tests.cc b/test/graph_generation_tests.cc index d42ec107d..469c07b07 100644 --- a/test/graph_generation_tests.cc +++ b/test/graph_generation_tests.cc @@ -66,7 +66,7 @@ TEST_CASE("isa<> RTTI helper correctly handles command hierarchies", "[rtti][com REQUIRE(utils::isa(hec)); auto* const pc = cdag.create(0, 0, 0, 0, subrange<3>{}); REQUIRE(utils::isa(pc)); - auto* const apc = cdag.create(0, 0, 0, GridRegion<3>{}); + auto* const apc = cdag.create(0, 0, 0, region<3>{}); REQUIRE(utils::isa(apc)); } diff --git a/test/grid_benchmarks.cc b/test/grid_benchmarks.cc new file mode 100644 index 000000000..80cb93d4e --- /dev/null +++ b/test/grid_benchmarks.cc @@ -0,0 +1,194 @@ +#include "grid_test_utils.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace celerity; +using namespace celerity::detail; + +template +std::vector> create_random_boxes(const size_t grid_size, const size_t max_box_size, const size_t num_boxes, const uint32_t seed) { + std::minstd_rand rng(seed); + std::uniform_int_distribution offset_dist(0, grid_size - 1); + std::binomial_distribution range_dist(max_box_size - 1, 0.5); + std::vector> boxes; + while(boxes.size() < num_boxes) { + subrange sr; + bool inbounds = true; + for(int d = 0; d < Dims; ++d) { + sr.offset[d] = offset_dist(rng); + sr.range[d] = 1 + range_dist(rng); + inbounds &= sr.offset[d] + sr.range[d] <= grid_size; + } + if(inbounds) { boxes.emplace_back(sr); } + } + return boxes; +} + +TEST_CASE("normalizing randomized box sets - 2d", "[benchmark][grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 200}, + })); + + const auto input_2d = create_random_boxes<2>(grid_size, max_box_size, num_boxes, 42); + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(input_2d)); }; + + const auto input_3d = grid_detail::boxes_cast<3>(input_2d); + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(input_3d)); }; + + const auto normalized_2d = grid_detail::normalize(std::vector(input_2d)); + const auto normalized_3d = grid_detail::normalize(std::vector(input_3d)); + CHECK(normalized_3d == grid_detail::boxes_cast<3>(normalized_2d)); + + test_utils::render_boxes(input_2d, fmt::format("{}-input", label)); + test_utils::render_boxes(normalized_2d, fmt::format("{}-normalized", label)); +} + +TEST_CASE("normalizing randomized box sets - 3d", "[benchmark][grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 200}, + })); + + const auto input_3d = create_random_boxes<3>(grid_size, max_box_size, num_boxes, 42); + BENCHMARK(fmt::format("{} - native", label)) { return grid_detail::normalize(std::vector(input_3d)); }; + test_utils::black_hole(grid_detail::normalize(std::vector(input_3d))); // to attach a profiler +} + +template +std::vector> create_box_tiling(const size_t n_per_side) { + const size_t length = 5; + size_t n_linear = 1; + for(int d = 0; d < Dims; ++d) { + n_linear *= n_per_side; + } + std::vector> boxes(n_linear); + for(size_t i = 0; i < n_linear; ++i) { + subrange sr; + auto dist_i = i; + for(int d = 0; d < Dims; ++d) { + sr.offset[d] = length * (dist_i % n_per_side); + sr.range[d] = length; + dist_i /= n_per_side; + } + boxes[i] = sr; + } + return boxes; +} + +TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[benchmark][grid]", ((int Dims), Dims), 1, 2, 3) { + const auto [label, n] = GENERATE(values>({ + {"small", 4}, + {"medium", 50}, + {"large", 1000}, + })); + + const size_t n_per_side = llrint(pow(n, 1.0 / Dims)); + + const auto boxes_nd = create_box_tiling(n_per_side); + const auto normalized_nd = grid_detail::normalize(std::vector(boxes_nd)); + CHECK(normalized_nd.size() == 1); + + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_nd)); }; + + if constexpr(Dims < 3) { + const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_nd); + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; + } + + if constexpr(Dims == 2) { + test_utils::render_boxes(boxes_nd, fmt::format("{}-input", label)); + test_utils::render_boxes(normalized_nd, fmt::format("{}-normalized", label)); + } +} + +TEST_CASE("performing set operations between randomized regions - 2d", "[benchmark][grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 100}, + })); + + const std::vector inputs_2d{ + region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<2>(grid_size, max_box_size, num_boxes, 37))}; + const std::vector inputs_3d{region_cast<3>(inputs_2d[0]), region_cast<3>(inputs_2d[1])}; + + test_utils::render_boxes(inputs_2d[0].get_boxes(), fmt::format("{}-input-a", label)); + test_utils::render_boxes(inputs_2d[1].get_boxes(), fmt::format("{}-input-b", label)); + + BENCHMARK(fmt::format("union, {}, native", label)) { return region_union(inputs_2d[0], inputs_2d[1]); }; + BENCHMARK(fmt::format("union, {}, embedded in 3d", label)) { return region_union(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("intersection, {}, native", label)) { return region_intersection(inputs_2d[0], inputs_2d[1]); }; + BENCHMARK(fmt::format("intersection, {}, embedded in 3d", label)) { return region_intersection(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("difference, {}, native", label)) { return region_difference(inputs_2d[0], inputs_2d[1]); }; + BENCHMARK(fmt::format("difference, {}, embedded in 3d", label)) { return region_difference(inputs_3d[0], inputs_3d[1]); }; + + const auto union_2d = region_union(inputs_2d[0], inputs_2d[1]); + const auto union_3d = region_union(inputs_3d[0], inputs_3d[1]); + const auto intersection_2d = region_intersection(inputs_2d[0], inputs_2d[1]); + const auto intersection_3d = region_intersection(inputs_3d[0], inputs_3d[1]); + const auto difference_2d = region_difference(inputs_2d[0], inputs_2d[1]); + const auto difference_3d = region_difference(inputs_3d[0], inputs_3d[1]); + + CHECK(union_3d == region_cast<3>(union_2d)); + CHECK(intersection_3d == region_cast<3>(intersection_2d)); + CHECK(difference_3d == region_cast<3>(difference_2d)); + + test_utils::render_boxes(union_2d.get_boxes(), fmt::format("union-{}", label)); + test_utils::render_boxes(intersection_2d.get_boxes(), fmt::format("intersection-{}", label)); + test_utils::render_boxes(difference_2d.get_boxes(), fmt::format("difference-{}", label)); +} + +TEST_CASE("performing set operations between randomized regions - 3d", "[benchmark][grid]") { + const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values>({ + {"small", 10, 5, 4}, + {"medium", 50, 1, 50}, + {"large", 200, 20, 100}, + })); + + const std::vector inputs_3d{ + region(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 13)), region(create_random_boxes<3>(grid_size, max_box_size, num_boxes, 37))}; + + BENCHMARK(fmt::format("union, {}, native", label)) { return region_union(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("intersection, {}, native", label)) { return region_intersection(inputs_3d[0], inputs_3d[1]); }; + BENCHMARK(fmt::format("difference, {}, native", label)) { return region_difference(inputs_3d[0], inputs_3d[1]); }; + + // to attach a profiler + test_utils::black_hole(region_union(inputs_3d[0], inputs_3d[1])); + test_utils::black_hole(region_intersection(inputs_3d[0], inputs_3d[1])); + test_utils::black_hole(region_difference(inputs_3d[0], inputs_3d[1])); +} + +std::vector> create_interlocking_boxes(const size_t num_boxes_per_side) { + std::vector> boxes; + for(size_t i = 0; i < num_boxes_per_side; ++i) { + boxes.emplace_back(id<2>(i, i), id<2>(i + 1, num_boxes_per_side)); + boxes.emplace_back(id<2>(i + 1, i), id<2>(num_boxes_per_side, i + 1)); + } + return boxes; +} + +TEST_CASE("normalizing a fully mergeable, complex tiling of boxes - 2d", "[benchmark][grid]") { + const auto [label, n] = GENERATE(values>({ + {"small", 10}, + {"large", 200}, + })); + + const auto boxes_2d = create_interlocking_boxes(n); + const auto boxes_3d = grid_detail::boxes_cast<3>(boxes_2d); + + BENCHMARK(fmt::format("{}, native", label)) { return grid_detail::normalize(std::vector(boxes_2d)); }; + BENCHMARK(fmt::format("{}, embedded in 3d", label)) { return grid_detail::normalize(std::vector(boxes_3d)); }; + + test_utils::render_boxes(boxes_2d, fmt::format("{}-input", label)); +} diff --git a/test/grid_test_utils.cc b/test/grid_test_utils.cc new file mode 100644 index 000000000..3d904307c --- /dev/null +++ b/test/grid_test_utils.cc @@ -0,0 +1,112 @@ +#include "grid_test_utils.h" + +#if CELERITY_DETAIL_HAVE_CAIRO +#include +#endif + +using namespace celerity; +using namespace celerity::detail; + +// forward declarations for functions not exposed in grid.h +namespace celerity::test_utils { + +// input: h as an angle in [0,360] and s,l in [0,1] - output: r,g,b in [0,1] +std::array hsl2rgb(const float h, const float s, const float l) { + constexpr auto hue2rgb = [](const float p, const float q, float t) { + if(t < 0) t += 1; + if(t > 1) t -= 1; + if(t < 1.f / 6) return p + (q - p) * 6 * t; + if(t < 1.f / 2) return q; + if(t < 2.f / 3) return p + (q - p) * (2.f / 3 - t) * 6; + return p; + }; + + if(s == 0) return {l, l, l}; // achromatic + + const auto q = l < 0.5 ? l * (1 + s) : l + s - l * s; + const auto p = 2 * l - q; + const auto r = hue2rgb(p, q, h + 1.f / 3); + const auto g = hue2rgb(p, q, h); + const auto b = hue2rgb(p, q, h - 1.f / 3); + return {r, g, b}; +} + +void render_boxes(const std::vector>& boxes, const std::string_view suffix) { +#if CELERITY_DETAIL_HAVE_CAIRO + const auto env = std::getenv("CELERITY_RENDER_REGIONS"); + if(env == nullptr || env[0] == 0) return; + + constexpr int ruler_width = 30; + constexpr int ruler_space = 4; + constexpr int text_margin = 2; + constexpr int border_start = ruler_width + ruler_space; + constexpr int cell_size = 20; + constexpr int border_end = 30; + constexpr int inset = 1; + + const auto bounds = bounding_box(boxes); + const auto canvas_width = border_start + static_cast(bounds.get_max()[1]) * cell_size + border_end; + const auto canvas_height = border_start + static_cast(bounds.get_max()[0]) * cell_size + border_end; + + cairo_surface_t* surface = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, canvas_width, canvas_height); + cairo_t* cr = cairo_create(surface); + + cairo_select_font_face(cr, "sans", CAIRO_FONT_SLANT_NORMAL, CAIRO_FONT_WEIGHT_NORMAL); + cairo_set_font_size(cr, 12); + + cairo_set_source_rgb(cr, 0, 0, 0); + cairo_set_line_width(cr, 1); + for(int i = 0; i < static_cast(bounds.get_max()[1]) + 1; ++i) { + const auto x = border_start + 2 * inset + i * cell_size; + cairo_move_to(cr, static_cast(x) - 0.5f, text_margin); + cairo_line_to(cr, static_cast(x) - 0.5f, ruler_width); + cairo_stroke(cr); + const auto label = fmt::format("{}", i); + cairo_text_extents_t te; + cairo_text_extents(cr, label.c_str(), &te); + cairo_move_to(cr, x + text_margin, text_margin + te.height); + cairo_show_text(cr, label.c_str()); + } + for(int i = 0; i < static_cast(bounds.get_max()[0]) + 1; ++i) { + const auto y = border_start + 2 * inset + i * cell_size; + cairo_move_to(cr, text_margin, static_cast(y) - 0.5f); + cairo_line_to(cr, ruler_width, static_cast(y) - 0.5f); + cairo_stroke(cr); + const auto label = fmt::format("{}", i); + cairo_text_extents_t te; + cairo_text_extents(cr, label.c_str(), &te); + cairo_move_to(cr, text_margin, y + te.height + text_margin); + cairo_show_text(cr, label.c_str()); + } + + cairo_set_operator(cr, CAIRO_OPERATOR_HSL_HUE); + for(size_t i = 0; i < boxes.size(); ++i) { + const auto hue = static_cast(i) / static_cast(boxes.size()); + const auto [r, g, b] = hsl2rgb(hue, 0.8f, 0.6f); + cairo_set_source_rgb(cr, r, g, b); + const auto sr = static_cast>(boxes[i]); + const auto x = border_start + 2 * inset + static_cast(sr.offset[1]) * cell_size; + const auto y = border_start + 2 * inset + static_cast(sr.offset[0]) * cell_size; + const auto w = static_cast(sr.range[1]) * cell_size - 2 * inset; + const auto h = static_cast(sr.range[0]) * cell_size - 2 * inset; + cairo_rectangle(cr, x, y, w, h); + cairo_fill(cr); + } + + cairo_set_source_rgb(cr, 1.0, 1.0, 1.0); + cairo_rectangle(cr, 0, 0, canvas_width, canvas_height); + cairo_set_operator(cr, CAIRO_OPERATOR_DEST_OVER); + cairo_fill(cr); + + cairo_destroy(cr); + + const auto test_name = Catch::getResultCapture().getCurrentTestName(); + const auto image_name = fmt::format("{}-{}.png", std::regex_replace(test_name, std::regex("[^a-zA-Z0-9]+"), "-"), suffix); + cairo_surface_write_to_png(surface, image_name.c_str()); + cairo_surface_destroy(surface); +#else + (void)boxes; +#endif +} + +} diff --git a/test/grid_test_utils.h b/test/grid_test_utils.h new file mode 100644 index 000000000..01413089a --- /dev/null +++ b/test/grid_test_utils.h @@ -0,0 +1,22 @@ +#include "grid.h" +#include "test_utils.h" + +namespace celerity::test_utils { + +struct partition_vector_order { + template + bool operator()(const std::vector>& lhs, const std::vector>& rhs) { + if(lhs.size() < rhs.size()) return true; + if(lhs.size() > rhs.size()) return false; + constexpr detail::box_coordinate_order box_order; + for(size_t i = 0; i < lhs.size(); ++i) { + if(box_order(lhs[i], rhs[i])) return true; + if(box_order(rhs[i], lhs[i])) return false; + } + return false; + } +}; + +void render_boxes(const std::vector>& boxes, const std::string_view suffix = "region"); + +} diff --git a/test/grid_tests.cc b/test/grid_tests.cc new file mode 100644 index 000000000..43d89f5e5 --- /dev/null +++ b/test/grid_tests.cc @@ -0,0 +1,374 @@ +#include "grid_test_utils.h" + +#include +#include +#include +#include +#include + +#include +#include + +using namespace celerity; +using namespace celerity::detail; + +TEST_CASE("split_box dissects boxes as expected - 3d", "[grid]") { + const box<3> input_box{{0, 0, 0}, {7, 9, 5}}; + const std::vector> cuts{ + {0, 4, 8, 12}, + {8, 9}, + }; + std::vector> expected{ + {{0, 0, 0}, {4, 8, 5}}, + {{0, 8, 0}, {4, 9, 5}}, + {{4, 0, 0}, {7, 8, 5}}, + {{4, 8, 0}, {7, 9, 5}}, + }; + + std::vector> split; + grid_detail::dissect_box(input_box, cuts, split, 0); + + std::sort(split.begin(), split.end(), box_coordinate_order()); + std::sort(expected.begin(), expected.end(), box_coordinate_order()); + CHECK(split == expected); +} + +template +void test_directional_merge(std::vector> unmerged, std::vector> merged) { + CAPTURE(MergeDim); + std::minstd_rand rng(42); + std::shuffle(unmerged.begin(), unmerged.end(), rng); + CAPTURE(unmerged); + auto test = unmerged; + test.erase(grid_detail::merge_connected_boxes_along_dim(test.begin(), test.end()), test.end()); + std::sort(test.begin(), test.end(), box_coordinate_order()); + std::sort(merged.begin(), merged.end(), box_coordinate_order()); + CHECK(test == merged); +} + +TEST_CASE("directional merge of non-overlapping boxes - 1d", "[grid]") { + const std::vector> unmerged{ + {{0}, {2}}, + {{2}, {4}}, + {{4}, {8}}, + {{10}, {12}}, + }; + const std::vector> merged{ + {{0}, {8}}, + {{10}, {12}}, + }; + test_directional_merge<0>(unmerged, merged); +} + +TEST_CASE("directional merge of overlapping boxes - 1d", "[grid]") { + const std::vector> unmerged{ + {{0}, {6}}, + {{2}, {4}}, + {{8}, {12}}, + {{10}, {16}}, + {{16}, {18}}, + }; + const std::vector> merged{ + {{0}, {6}}, + {{8}, {18}}, + }; + test_directional_merge<0>(unmerged, merged); +} + +TEST_CASE("directional merge of non-overlapping boxes - 2d", "[grid]") { + const std::vector> unmerged{ + {{0, 0}, {2, 2}}, + {{0, 2}, {2, 4}}, + {{0, 4}, {2, 6}}, + {{2, 2}, {4, 4}}, + {{2, 4}, {4, 6}}, + {{2, 6}, {4, 8}}, + {{4, 4}, {6, 6}}, + {{4, 6}, {6, 8}}, + {{4, 8}, {6, 10}}, + }; + + const std::vector> merged_dim0{ + {{0, 0}, {2, 2}}, + {{0, 2}, {4, 4}}, + {{0, 4}, {6, 6}}, + {{2, 6}, {6, 8}}, + {{4, 8}, {6, 10}}, + }; + test_directional_merge<0>(unmerged, merged_dim0); + + const std::vector> merged_dim1{ + {{0, 0}, {2, 6}}, + {{2, 2}, {4, 8}}, + {{4, 4}, {6, 10}}, + }; + test_directional_merge<1>(unmerged, merged_dim1); + + test_utils::render_boxes(unmerged, "unmerged"); + test_utils::render_boxes(merged_dim0, "merged-dim0"); + test_utils::render_boxes(merged_dim1, "merged-dim1"); +} + +TEST_CASE("directional merge of overlapping boxes - 2d", "[grid]") { + const std::vector> unmerged{ + {{0, 0}, {12, 3}}, + {{0, 1}, {12, 4}}, + {{0, 4}, {12, 6}}, + {{0, 8}, {12, 10}}, + {{0, 0}, {3, 12}}, + {{1, 0}, {4, 12}}, + {{4, 0}, {6, 12}}, + {{8, 0}, {10, 12}}, + }; + + const std::vector> merged_dim0{ + {{0, 0}, {12, 3}}, + {{0, 1}, {12, 4}}, + {{0, 4}, {12, 6}}, + {{0, 8}, {12, 10}}, + {{0, 0}, {6, 12}}, + {{8, 0}, {10, 12}}, + }; + test_directional_merge<0>(unmerged, merged_dim0); + + const std::vector> merged_dim1{ + {{0, 0}, {12, 6}}, + {{0, 8}, {12, 10}}, + {{0, 0}, {3, 12}}, + {{1, 0}, {4, 12}}, + {{4, 0}, {6, 12}}, + {{8, 0}, {10, 12}}, + }; + test_directional_merge<1>(unmerged, merged_dim1); + + test_utils::render_boxes(unmerged, "unmerged"); + test_utils::render_boxes(merged_dim0, "merged-dim0"); + test_utils::render_boxes(merged_dim1, "merged-dim1"); +} + +TEST_CASE("directional merge of non-overlapping 3d boxes", "[grid]") { + const std::vector> unmerged{ + {{0, 0, 2}, {2, 2, 4}}, + {{0, 2, 0}, {2, 4, 2}}, + {{0, 2, 2}, {2, 4, 4}}, + {{2, 0, 0}, {4, 2, 2}}, + {{2, 0, 2}, {4, 2, 4}}, + {{2, 2, 0}, {4, 4, 2}}, + {{2, 2, 2}, {4, 4, 4}}, + }; + + const std::vector> merged_dim0{ + {{0, 0, 2}, {4, 2, 4}}, + {{0, 2, 0}, {4, 4, 2}}, + {{0, 2, 2}, {4, 4, 4}}, + {{2, 0, 0}, {4, 2, 2}}, + }; + test_directional_merge<0>(unmerged, merged_dim0); + + const std::vector> merged_dim1{ + {{0, 2, 0}, {2, 4, 2}}, + {{0, 0, 2}, {2, 4, 4}}, + {{2, 0, 0}, {4, 4, 2}}, + {{2, 0, 2}, {4, 4, 4}}, + }; + test_directional_merge<1>(unmerged, merged_dim1); + + const std::vector> merged_dim2{ + {{0, 0, 2}, {2, 2, 4}}, + {{0, 2, 0}, {2, 4, 4}}, + {{2, 0, 0}, {4, 2, 4}}, + {{2, 2, 0}, {4, 4, 4}}, + }; + test_directional_merge<2>(unmerged, merged_dim2); +} + +TEST_CASE("region normalization removes overlaps - 2d", "[grid]") { + const std::vector> overlapping{ + {{0, 0}, {4, 4}}, + {{2, 2}, {6, 6}}, + {{4, 8}, {5, 9}}, + }; + std::vector> normalized{ + {{0, 0}, {2, 4}}, + {{2, 0}, {4, 6}}, + {{4, 2}, {6, 6}}, + {{4, 8}, {5, 9}}, + }; + + const auto result = grid_detail::normalize(std::vector(overlapping)); + std::sort(normalized.begin(), normalized.end(), box_coordinate_order()); + CHECK(result == normalized); + + test_utils::render_boxes(overlapping, "input"); + test_utils::render_boxes(result, "result"); + test_utils::render_boxes(normalized, "normalized"); +} + +TEST_CASE("region normalization maximizes extent of fast dimensions - 2d", "[grid]") { + const std::vector> input{ + {{0, 0}, {8, 2}}, + {{0, 2}, {2, 4}}, + {{6, 2}, {8, 4}}, + {{0, 4}, {8, 6}}, + }; + std::vector> normalized{ + {{0, 0}, {2, 6}}, + {{2, 0}, {6, 2}}, + {{2, 4}, {6, 6}}, + {{6, 0}, {8, 6}}, + }; + + const auto result = grid_detail::normalize(std::vector(input)); + std::sort(normalized.begin(), normalized.end(), box_coordinate_order()); + CHECK(result == normalized); + + test_utils::render_boxes(input, "input"); + test_utils::render_boxes(result, "result"); + test_utils::render_boxes(normalized, "normalized"); +} + +TEST_CASE("region union - 2d", "[grid]") { + const region<2> ra{{ + {{0, 0}, {3, 3}}, + {{4, 0}, {7, 3}}, + {{0, 7}, {1, 9}}, + {{4, 7}, {6, 9}}, + }}; + const region<2> rb{{ + {{2, 3}, {5, 6}}, + {{6, 3}, {9, 6}}, + {{1, 7}, {2, 9}}, + {{4, 7}, {6, 9}}, + }}; + + std::vector> expected{ + {{0, 0}, {2, 3}}, + {{2, 0}, {3, 6}}, + {{3, 3}, {4, 6}}, + {{4, 0}, {5, 6}}, + {{5, 0}, {6, 3}}, + {{6, 0}, {7, 6}}, + {{7, 3}, {9, 6}}, + {{0, 7}, {2, 9}}, + {{4, 7}, {6, 9}}, + }; + std::sort(expected.begin(), expected.end(), box_coordinate_order()); + + const auto result = region_union(ra, rb); + CHECK(result.get_boxes() == expected); + + test_utils::render_boxes(ra.get_boxes(), "ra"); + test_utils::render_boxes(rb.get_boxes(), "rb"); + test_utils::render_boxes(expected, "expected"); + test_utils::render_boxes(result.get_boxes(), "result"); +} + +TEST_CASE("region intersection - 2d", "[grid]") { + const region<2> ra{{ + {{2, 2}, {6, 6}}, + {{6, 2}, {8, 4}}, + {{8, 0}, {9, 4}}, + {{0, 12}, {3, 14}}, + {{2, 9}, {4, 11}}, + }}; + const region<2> rb{{ + {{3, 4}, {7, 8}}, + {{7, 1}, {8, 4}}, + {{8, 2}, {9, 5}}, + {{2, 9}, {3, 14}}, + }}; + + std::vector> expected{ + {{3, 4}, {6, 6}}, + {{7, 2}, {9, 4}}, + {{2, 9}, {3, 11}}, + {{2, 12}, {3, 14}}, + }; + std::sort(expected.begin(), expected.end(), box_coordinate_order()); + + const auto result = region_intersection(ra, rb); + CHECK(result.get_boxes() == expected); + + test_utils::render_boxes(ra.get_boxes(), "ra"); + test_utils::render_boxes(rb.get_boxes(), "rb"); + test_utils::render_boxes(expected, "expected"); + test_utils::render_boxes(result.get_boxes(), "result"); +} + +TEST_CASE("region difference - 2d", "[grid]") { + const region<2> ra{{ + {{0, 0}, {6, 6}}, + {{1, 8}, {4, 11}}, + {{8, 2}, {10, 4}}, + }}; + const region<2> rb{{ + {{1, 1}, {3, 3}}, + {{2, 2}, {4, 4}}, + {{0, 9}, {2, 12}}, + {{4, 11}, {6, 13}}, + {{7, 1}, {11, 5}}, + }}; + + std::vector> expected{ + {{0, 0}, {1, 6}}, + {{1, 0}, {3, 1}}, + {{3, 0}, {4, 2}}, + {{1, 3}, {2, 6}}, + {{2, 4}, {4, 6}}, + {{4, 0}, {6, 6}}, + {{1, 8}, {2, 9}}, + {{2, 8}, {4, 11}}, + }; + std::sort(expected.begin(), expected.end(), box_coordinate_order()); + + const auto result = region_difference(ra, rb); + CHECK(result.get_boxes() == expected); + + test_utils::render_boxes(ra.get_boxes(), "ra"); + test_utils::render_boxes(rb.get_boxes(), "rb"); + test_utils::render_boxes(expected, "expected"); + test_utils::render_boxes(result.get_boxes(), "result"); +} + +TEST_CASE("region normalization - 0d", "[grid]") { + std::vector> r; + auto n = r; + CHECK(grid_detail::normalize(std::vector(r)).empty()); + r.emplace_back(); + CHECK(grid_detail::normalize(std::vector(r)) == std::vector{{box<0>()}}); + r.emplace_back(); + CHECK(grid_detail::normalize(std::vector(r)) == std::vector{{box<0>()}}); +} + +TEST_CASE("region union - 0d", "[grid]") { + region<0> empty; + CHECK(empty.empty()); + region<0> unit{{box<0>{}}}; + CHECK(!unit.empty()); + CHECK(region_union(empty, empty).empty()); + CHECK(!region_union(empty, unit).empty()); + CHECK(!region_union(unit, empty).empty()); + CHECK(!region_union(unit, unit).empty()); +} + +TEST_CASE("region intersection - 0d", "[grid]") { + region<0> empty; + CHECK(empty.empty()); + region<0> unit{{box<0>{}}}; + CHECK(!unit.empty()); + CHECK(region_intersection(empty, empty).empty()); + CHECK(region_intersection(empty, unit).empty()); + CHECK(region_intersection(unit, empty).empty()); + CHECK(!region_intersection(unit, unit).empty()); +} + +TEST_CASE("region difference - 0d", "[grid]") { + region<0> empty; + CHECK(empty.empty()); + region<0> unit{{box<0>{}}}; + CHECK(!unit.empty()); + CHECK(region_difference(empty, empty).empty()); + CHECK(region_difference(empty, unit).empty()); + CHECK(!region_difference(unit, empty).empty()); + CHECK(region_difference(unit, unit).empty()); +} diff --git a/test/integration/backend.cc b/test/integration/backend.cc index 0e3a8d09a..b57463d0d 100644 --- a/test/integration/backend.cc +++ b/test/integration/backend.cc @@ -6,12 +6,32 @@ std::abort(); \ } +template +celerity::range truncate_range(const celerity::range<3>& r3) { + celerity::range r = celerity::detail::zero_range; + for(int d = 0; d < Dims; ++d) { + r[d] = r3[d]; + } + return r; +} + +template +celerity::subrange truncate_subrange(const celerity::subrange<3>& sr3) { + celerity::subrange sr; + for(int d = 0; d < Dims; ++d) { + sr.offset[d] = sr3.offset[d]; + sr.range[d] = sr3.range[d]; + } + return sr; +} + + template struct kernel_name {}; template void test_copy(celerity::distr_queue& q) { - celerity::buffer buf(celerity::detail::range_cast(celerity::range<3>{5, 7, 9})); + celerity::buffer buf(truncate_range({5, 7, 9})); // Initialize on device q.submit([&](celerity::handler& cgh) { @@ -20,8 +40,8 @@ void test_copy(celerity::distr_queue& q) { }); // Check and modify partially on host - const auto sr = celerity::detail::subrange_cast(celerity::subrange<3>{{1, 2, 3}, {3, 4, 5}}); - const auto sr3 = celerity::detail::subrange_cast<3>(sr); + const auto sr3 = celerity::subrange<3>{{1, 2, 3}, {3, 4, 5}}; + const auto sr = truncate_subrange({{1, 2, 3}, {3, 4, 5}}); q.submit([&](celerity::handler& cgh) { celerity::accessor acc{buf, cgh, celerity::access::fixed{sr}, celerity::read_write_host_task}; cgh.host_task(celerity::on_master_node, [=]() { diff --git a/test/print_graph_tests.cc b/test/print_graph_tests.cc index 1089b9229..dd0f8cd7f 100644 --- a/test/print_graph_tests.cc +++ b/test/print_graph_tests.cc @@ -43,11 +43,11 @@ TEST_CASE("task-graph printing is unchanged", "[print_graph][task-graph]") { // replace the `expected` value with the new dot graph. const std::string expected = "digraph G {label=\"Task Graph\" 0[shape=ellipse label=epoch>];1[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
discard_write B1 {[[0,0,0] - [1,1,1]]}>];0->1[color=orchid];2[shape=box style=rounded " - "label=device-compute [0,0,0] - [64,1,1]
discard_write B0 {[[0,0,0] - " - "[64,1,1]]}>];0->2[color=orchid];3[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
(R1) " - "read_write B1 {[[0,0,0] - [1,1,1]]}
read B0 {[[0,0,0] - [64,1,1]]}>];1->3[];2->3[];4[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
read B1 {[[0,0,0] - [1,1,1]]}>];3->4[];}"; + "
device-compute [0,0,0] - [64,1,1]
discard_write B1 {[0,0,0] - [1,1,1]}>];0->1[color=orchid];2[shape=box style=rounded " + "label=device-compute [0,0,0] - [64,1,1]
discard_write B0 {[0,0,0] - " + "[64,1,1]}>];0->2[color=orchid];3[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
(R1) " + "read_write B1 {[0,0,0] - [1,1,1]}
read B0 {[0,0,0] - [64,1,1]}>];1->3[];2->3[];4[shape=box style=rounded label=device-compute [0,0,0] - [64,1,1]
read B1 {[0,0,0] - [1,1,1]}>];3->4[];}"; CHECK(print_task_graph(tt.trec) == expected); } diff --git a/test/region_map_tests.cc b/test/region_map_tests.cc index 4943d26f9..57ca83c1a 100644 --- a/test/region_map_tests.cc +++ b/test/region_map_tests.cc @@ -19,12 +19,12 @@ using namespace celerity; using namespace celerity::detail; -template +template using region_map_impl = region_map_detail::region_map_impl; namespace celerity::detail { struct region_map_testspy { - template + template static void traverse(const region_map_impl& rm, const Callback& cb) { auto recurse = [&cb](auto& node, const size_t level, auto& r) -> void { for(size_t i = 0; i < node.m_child_boxes.size(); ++i) { @@ -39,44 +39,44 @@ struct region_map_testspy { recurse(*rm.m_root, 0, recurse); } - template + template static size_t get_num_leaf_nodes(const region_map_impl& rm) { size_t num_leaf_nodes = 0; - traverse(rm, [&num_leaf_nodes]( - const size_t /* level */, const GridBox& /* box */, const std::optional& value, const size_t /* num_children */) { - if(value.has_value()) { num_leaf_nodes++; } - }); + traverse(rm, + [&num_leaf_nodes](const size_t /* level */, const box& /* box */, const std::optional& value, const size_t /* num_children */) { + if(value.has_value()) { num_leaf_nodes++; } + }); return num_leaf_nodes; } - template + template static size_t get_depth(const region_map_impl& rm) { size_t depth = 1; - traverse(rm, [&depth](const size_t level, const GridBox& /* box */, const std::optional& /* value */, - const size_t /* num_children */) { depth = std::max(depth, level + 1); }); + traverse(rm, [&depth](const size_t level, const box& /* box */, const std::optional& /* value */, const size_t /* num_children */) { + depth = std::max(depth, level + 1); + }); return depth; } - template + template static double compute_overlap(const region_map_impl& rm) { - std::vector>> boxes_by_level; - traverse( - rm, [&boxes_by_level](const size_t level, const GridBox& box, const std::optional& /* value */, const size_t /* num_children */) { - while(boxes_by_level.size() < level + 1) { - boxes_by_level.push_back({}); - } - boxes_by_level[level].push_back(box); - }); + std::vector>> boxes_by_level; + traverse(rm, [&boxes_by_level](const size_t level, const box& box, const std::optional& /* value */, const size_t /* num_children */) { + while(boxes_by_level.size() < level + 1) { + boxes_by_level.push_back({}); + } + boxes_by_level[level].push_back(box); + }); const size_t num_levels = boxes_by_level.size(); - std::vector> box_union_by_level(num_levels, GridRegion{}); + std::vector> box_union_by_level(num_levels, region{}); size_t total_overlap_area = 0; for(size_t l = 0; l < num_levels; ++l) { size_t overlap = 0; for(auto& b : boxes_by_level[l]) { - overlap += GridRegion::intersect(box_union_by_level[l], b).area(); - box_union_by_level[l] = GridRegion::merge(box_union_by_level[l], b); + overlap += region_intersection(box_union_by_level[l], b).get_area(); + box_union_by_level[l] = region_union(box_union_by_level[l], b); } total_overlap_area += overlap; @@ -88,20 +88,20 @@ struct region_map_testspy { } // We return a percentage value of how much area in the entire rm is overlapping (this may exceed 1) - return static_cast(total_overlap_area) / (rm.m_extent.area() * num_levels); + return static_cast(total_overlap_area) / (rm.m_extent.get_area() * num_levels); } - template - static void erase(region_map_impl& rm, const GridBox& box) { + template + static void erase(region_map_impl& rm, const box& box) { rm.erase(box); } - template - static void insert(region_map_impl& rm, const GridBox& box, const ValueType& value) { + template + static void insert(region_map_impl& rm, const box& box, const ValueType& value) { rm.insert(box, value); } - template + template static void try_merge(region_map_impl& rm, std::vector::types::entry> candidates) { rm.try_merge(std::move(candidates)); } @@ -126,7 +126,7 @@ void draw(const region_map_impl& rm) { cairo_select_font_face(cr, "sans", CAIRO_FONT_SLANT_NORMAL, CAIRO_FONT_WEIGHT_NORMAL); cairo_set_font_size(cr, 10.0); - region_map_testspy::traverse(rm, [&](const size_t level, const GridBox<2>& box, const std::optional& value, const size_t num_children) { + region_map_testspy::traverse(rm, [&](const size_t level, const box<2>& box, const std::optional& value, const size_t num_children) { const auto min = box.get_min(); const auto max = box.get_max(); const float inset = 3.f; @@ -183,7 +183,7 @@ void draw(const region_map_impl& rm) { TEST_CASE("region_map::try_merge does not attempt to merge intermediate results that no longer exist", "[region_map]") { region_map_impl rm({99, 99}, -1); - std::vector, int>> entries = { + std::vector, int>> entries = { // These first three entries will be merged {{{0, 0}, {33, 66}}, 1}, {{{33, 0}, {66, 66}}, 1}, @@ -215,7 +215,7 @@ TEST_CASE("region_map::try_merge does not attempt to merge intermediate results } while(0) TEST_CASE("region_map can be moved", "[region_map]") { - constexpr int64_t size = 128; + constexpr size_t size = 128; const int default_value = -1; region_map_impl rm1{{size}, default_value}; rm1.update_box({0, size}, 1337); @@ -243,9 +243,9 @@ TEST_CASE("region_map handles basic operations in 0D", "[region_map]") { } TEST_CASE("region_map handles basic operations in 1D", "[region_map]") { - constexpr int64_t size = 128; - const int default_value = -1; - region_map_impl rm{{size}, default_value}; + constexpr size_t size = 128; + const size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{size}, default_value}; SECTION("query default value") { const auto results = rm.get_region_values({0, size}); @@ -276,22 +276,22 @@ TEST_CASE("region_map handles basic operations in 1D", "[region_map]") { } SECTION("update multiple") { - constexpr int num_parts = 16; - constexpr int slice = size / num_parts; + constexpr size_t num_parts = 16; + constexpr size_t slice = size / num_parts; // Iteratively split line into multiple parts - for(int64_t i = 0; i < num_parts; ++i) { - rm.update_box(GridBox<1>{i * slice, i * slice + slice}, static_cast(i)); + for(size_t i = 0; i < num_parts; ++i) { + rm.update_box(box<1>{i * slice, i * slice + slice}, i); const auto results = rm.get_region_values({0, size}); REQUIRE_LOOP(results.size() == static_cast(i + (i < (num_parts - 1) ? 2 : 1))); - for(int64_t j = 0; j < i + 1; ++j) { + for(size_t j = 0; j < i + 1; ++j) { REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [j, slice](auto& r) { - return r == std::pair{GridBox<1>{j * slice, j * slice + slice}, static_cast(j)}; + return r == std::pair{box<1>{j * slice, j * slice + slice}, j}; })); } if(i < num_parts - 1) { // Check that original value still exists REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [i, slice](auto& r) { - return r == std::pair{GridBox<1>{(i + 1) * slice, size}, -1}; + return r == std::pair{box<1>{(i + 1) * slice, size}, std::numeric_limits::max()}; })); } } @@ -299,10 +299,10 @@ TEST_CASE("region_map handles basic operations in 1D", "[region_map]") { } TEST_CASE("region_map handles basic operations in 2D", "[region_map]") { - constexpr int64_t height = 128; - constexpr int64_t width = 192; - constexpr int default_value = -1; - region_map_impl rm{{height, width}, default_value}; + constexpr size_t height = 128; + constexpr size_t width = 192; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{height, width}, default_value}; SECTION("query default value") { const auto results = rm.get_region_values({{0, 0}, {height, width}}); @@ -346,71 +346,70 @@ TEST_CASE("region_map handles basic operations in 2D", "[region_map]") { } SECTION("update multiple") { - constexpr int num_rows = 16; - constexpr int row_height = height / num_rows; + constexpr size_t num_rows = 16; + constexpr size_t row_height = height / num_rows; // Iteratively split domain into multiple rows - for(int64_t i = 0; i < num_rows; ++i) { - rm.update_box(GridBox<2>{{i * row_height, 0}, {i * row_height + row_height, width}}, static_cast(i)); + for(size_t i = 0; i < num_rows; ++i) { + rm.update_box(box<2>{{i * row_height, 0}, {i * row_height + row_height, width}}, i); const auto results = rm.get_region_values({{0, 0}, {height, width}}); // Until the last iteration we have to account for the original value. REQUIRE_LOOP(results.size() == static_cast(i + (i < (num_rows - 1) ? 2 : 1))); - for(int64_t j = 0; j < i + 1; ++j) { + for(size_t j = 0; j < i + 1; ++j) { REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [j, row_height](auto& r) { - return r == std::pair{GridBox<2>{{j * row_height, 0}, {j * row_height + row_height, width}}, static_cast(j)}; + return r == std::pair{box<2>{{j * row_height, 0}, {j * row_height + row_height, width}}, j}; })); } if(i < num_rows - 1) { // Check that original value still exists CHECK(std::any_of(results.begin(), results.end(), [i, row_height, default_value](auto& r) { - return r == std::pair{GridBox<2>{{(i + 1) * row_height, 0}, {height, width}}, default_value}; + return r == std::pair{box<2>{{(i + 1) * row_height, 0}, {height, width}}, default_value}; })); } } // Now drive a center column through all of them - rm.update_box(GridBox<2>{{0, 48}, {height, 80}}, -2); + rm.update_box(box<2>{{0, 48}, {height, 80}}, std::numeric_limits::max() - 2); const auto results = rm.get_region_values({{0, 0}, {height, width}}); - CHECK(std::any_of(results.begin(), results.end(), [](auto& r) { return r == std::pair{GridBox<2>{{0, 48}, {height, 80}}, -2}; })); + CHECK(std::any_of(results.begin(), results.end(), [](auto& r) { + return r == std::pair{box<2>{{0, 48}, {height, 80}}, std::numeric_limits::max() - 2}; + })); - for(int64_t i = 0; i < num_rows; ++i) { + for(size_t i = 0; i < num_rows; ++i) { REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [i, row_height](auto& r) { - return r == std::pair{GridBox<2>{{i * row_height, 0}, {i * row_height + row_height, 48}}, static_cast(i)}; + return r == std::pair{box<2>{{i * row_height, 0}, {i * row_height + row_height, 48}}, i}; })); REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [i, row_height](auto& r) { - return r == std::pair{GridBox<2>{{i * row_height, 80}, {i * row_height + row_height, width}}, static_cast(i)}; + return r == std::pair{box<2>{{i * row_height, 80}, {i * row_height + row_height, width}}, i}; })); } } SECTION("update growing from two sides") { - constexpr int num_rows = 16; - constexpr int row_height = height / num_rows; + constexpr size_t num_rows = 16; + constexpr size_t row_height = height / num_rows; // Iteratively split domain into multiple rows, working inwards from two sides - for(int64_t i = 0; i < num_rows / 2; ++i) { - rm.update_box(GridBox<2>{{i * row_height, 0}, {i * row_height + row_height, width}}, static_cast(i)); - rm.update_box( - GridBox<2>{{(num_rows - 1 - i) * row_height, 0}, {(num_rows - 1 - i) * row_height + row_height, width}}, num_rows + static_cast(i)); + for(size_t i = 0; i < num_rows / 2; ++i) { + rm.update_box(box<2>{{i * row_height, 0}, {i * row_height + row_height, width}}, i); + rm.update_box(box<2>{{(num_rows - 1 - i) * row_height, 0}, {(num_rows - 1 - i) * row_height + row_height, width}}, num_rows + i); const auto results = rm.get_region_values({{0, 0}, {height, width}}); // Until the last iteration we have to account for the original value. - REQUIRE_LOOP(results.size() == static_cast(2 * (i + 1) + (i < (num_rows / 2 - 1) ? 1 : 0))); + REQUIRE_LOOP(results.size() == 2 * (i + 1) + (i < (num_rows / 2 - 1) ? 1 : 0)); - for(int64_t j = 0; j < i + 1; ++j) { + for(size_t j = 0; j < i + 1; ++j) { REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [j, row_height](auto& r) { - return r == std::pair{GridBox<2>{{j * row_height, 0}, {j * row_height + row_height, width}}, static_cast(j)}; + return r == std::pair{box<2>{{j * row_height, 0}, {j * row_height + row_height, width}}, j}; })); REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [j, row_height, num_rows](auto& r) { - return r - == std::pair{GridBox<2>{{(num_rows - 1 - j) * row_height, 0}, {(num_rows - 1 - j) * row_height + row_height, width}}, - num_rows + static_cast(j)}; + return r == std::pair{box<2>{{(num_rows - 1 - j) * row_height, 0}, {(num_rows - 1 - j) * row_height + row_height, width}}, num_rows + j}; })); } if(i < num_rows / 2 - 1) { // Check that original value still exists REQUIRE_LOOP(std::any_of(results.begin(), results.end(), [i, row_height, num_rows, default_value](auto& r) { - return r == std::pair{GridBox<2>{{(i + 1) * row_height, 0}, {(num_rows - 1 - i) * row_height, width}}, default_value}; + return r == std::pair{box<2>{{(i + 1) * row_height, 0}, {(num_rows - 1 - i) * row_height, width}}, default_value}; })); } } @@ -418,15 +417,15 @@ TEST_CASE("region_map handles basic operations in 2D", "[region_map]") { // TODO: Also in 1D/3D? SECTION("update boxes random order") { - std::vector, int>> update_boxes; - int x = 100; - constexpr int box_height = height / 16; - constexpr int box_width = width / 16; - for(int64_t i = 0; i < 16; ++i) { - for(int64_t j = 0; j < 16; ++j) { - const GridPoint<2> min = {i * box_height, j * box_width}; - const GridPoint<2> max = min + GridPoint<2>{box_height, box_width}; - update_boxes.push_back(std::pair{GridBox<2>{min, max}, x++}); + std::vector, size_t>> update_boxes; + size_t x = 100; + constexpr size_t box_height = height / 16; + constexpr size_t box_width = width / 16; + for(size_t i = 0; i < 16; ++i) { + for(size_t j = 0; j < 16; ++j) { + const id<2> min = {i * box_height, j * box_width}; + const id<2> max = min + id<2>{box_height, box_width}; + update_boxes.push_back(std::pair{box<2>{min, max}, x++}); } } std::mt19937 g(123); @@ -447,11 +446,11 @@ TEST_CASE("region_map handles basic operations in 2D", "[region_map]") { } TEST_CASE("region_map handles basic operations in 3D", "[region_map]") { - constexpr int64_t depth = 128; - constexpr int64_t height = 192; - constexpr int64_t width = 256; - constexpr int default_value = -1; - region_map_impl rm{{depth, height, width}, default_value}; + constexpr size_t depth = 128; + constexpr size_t height = 192; + constexpr size_t width = 256; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{depth, height, width}, default_value}; SECTION("query default value") { const auto results = rm.get_region_values({{0, 0, 0}, {depth, height, width}}); @@ -511,30 +510,31 @@ TEST_CASE("region_map handles basic operations in 3D", "[region_map]") { } TEMPLATE_TEST_CASE_SIG("region_map updates get clamped to extent", "[region_map]", ((int Dims), Dims), 1, 2, 3) { - const auto extent = range_cast(range<3>(64, 96, 128)); - const auto full_box = GridBox<3>{{0, 0, 0}, {64, 96, 128}}; - region_map_impl rm{extent, 0}; + const auto extent = test_utils::truncate_range({64, 96, 128}); + const auto full_box = test_utils::truncate_box({{0, 0, 0}, {64, 96, 128}}); + region_map_impl rm{extent, 0}; - const auto exceeding_box = region_map_detail::box_cast(GridBox<3>({-32, -16, -8}, {72, 102, 136})); + // TODO boxes based on ids cannot be negative, so we cannot test clamping of the minimum at the moment + const auto exceeding_box = box({}, test_utils::truncate_range({72, 102, 136})); rm.update_box(exceeding_box, 1337); const auto results = rm.get_region_values(exceeding_box); - CHECK_RESULTS(results, {region_map_detail::box_cast(full_box), 1337}); + CHECK_RESULTS(results, {full_box, 1337}); } // This doesn't test anything in paticular, more of a smoke test. TEST_CASE("region_map correctly handles complex queries", "[region_map]") { - region_map_impl rm{{5, 9}, 99999}; + region_map_impl rm{{5, 9}, 99999}; - const std::initializer_list> data = {{{0, 0}, {2, 3}}, {{2, 0}, {5, 2}}, {{2, 2}, {5, 3}}, {{0, 3}, {3, 4}}, {{3, 3}, {4, 4}}, {{4, 3}, {5, 4}}, + const std::initializer_list> data = {{{0, 0}, {2, 3}}, {{2, 0}, {5, 2}}, {{2, 2}, {5, 3}}, {{0, 3}, {3, 4}}, {{3, 3}, {4, 4}}, {{4, 3}, {5, 4}}, {{0, 4}, {1, 9}}, {{1, 4}, {3, 9}}, {{3, 4}, {5, 6}}, {{3, 6}, {5, 7}}, {{3, 7}, {4, 9}}, {{4, 7}, {5, 9}}}; for(size_t i = 0; i < data.size(); ++i) { - rm.update_box(*(data.begin() + i), static_cast(i)); + rm.update_box(*(data.begin() + i), i); } SECTION("query single boxes") { - const auto query_and_check = [&](const GridBox<2>& box, int expected) { + const auto query_and_check = [&](const box<2>& box, size_t expected) { const auto results = rm.get_region_values(box); REQUIRE(results.size() == 1); CHECK(results[0] == std::pair{box, expected}); @@ -552,7 +552,7 @@ TEST_CASE("region_map correctly handles complex queries", "[region_map]") { } SECTION("query overlapping") { - const auto query_and_check = [&](const GridBox<2>& box, const std::vector, int>>& expected) { + const auto query_and_check = [&](const box<2>& box, const std::vector, size_t>>& expected) { const auto results = rm.get_region_values(box); CHECK(results.size() == expected.size()); for(const auto& e : expected) { @@ -577,9 +577,9 @@ TEST_CASE("region_map correctly handles complex queries", "[region_map]") { } TEST_CASE("region map merges entries with the same value upon update in 1D", "[region_map]") { - constexpr int64_t size = 128; - constexpr int default_value = -1; - region_map_impl rm{{size}, default_value}; + constexpr size_t size = 128; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{size}, default_value}; SECTION("simple merge") { rm.update_box({0, 64}, 3); @@ -598,10 +598,10 @@ TEST_CASE("region map merges entries with the same value upon update in 1D", "[r } TEST_CASE("region map merges entries with the same value upon update in 2D", "[region_map]") { - constexpr int64_t height = 64; - constexpr int64_t width = 128; - constexpr int default_value = -1; - region_map_impl rm{{height, width}, default_value}; + constexpr size_t height = 64; + constexpr size_t width = 128; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{height, width}, default_value}; SECTION("simple merge") { rm.update_box({{0, 0}, {height, 64}}, 3); @@ -622,13 +622,13 @@ TEST_CASE("region map merges entries with the same value upon update in 2D", "[r SECTION("merge cascade") { // Same as before, but ensure that the tree is several levels deep // Start by filling the tree with "horizontal bars" of decreasing length, preventing any merges between them - for(int64_t i = 0; i < height / 2; ++i) { + for(size_t i = 0; i < height / 2; ++i) { rm.update_box({{i * 2, 0}, {i * 2 + 2, width - 2 - i * 2}}, 3); } CHECK(region_map_testspy::get_num_leaf_nodes(rm) == 2 * (height / 2)); // Every bar creates two entries (old and new value) CHECK(region_map_testspy::get_depth(rm) > 2); // Tree should be several levels deep by now // Now update the values of the vertical bars, skip last one to prevent merge - for(int64_t i = 0; i < (height / 2) - 1; ++i) { + for(size_t i = 0; i < (height / 2) - 1; ++i) { rm.update_box({{i * 2, width - 2 - i * 2}, {height, width - 2 - i * 2 + 2}}, 3); } CHECK(region_map_testspy::get_num_leaf_nodes(rm) == 2 * (height / 2)); // No merges so far @@ -642,11 +642,11 @@ TEST_CASE("region map merges entries with the same value upon update in 2D", "[r } TEST_CASE("region map merges entries with the same value upon update in 3D", "[region_map]") { - constexpr int64_t depth = 64; - constexpr int64_t height = 96; - constexpr int64_t width = 128; - constexpr int default_value = -1; - region_map_impl rm{{depth, height, width}, default_value}; + constexpr size_t depth = 64; + constexpr size_t height = 96; + constexpr size_t width = 128; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{depth, height, width}, default_value}; SECTION("simple merge, quasi 1D") { rm.update_box({{0, 0, 0}, {depth, 64, width}}, 3); @@ -668,10 +668,10 @@ TEST_CASE("region map merges entries with the same value upon update in 3D", "[r // NOTE: Merging on query is not required (or possible) in 1D: All merges will be done on update. TEST_CASE("region_map merges truncated result boxes with the same value upon querying in 2D", "[region_map]") { - constexpr int64_t height = 5; - constexpr int64_t width = 9; - constexpr int default_value = -1; - region_map_impl rm{{height, width}, default_value}; + constexpr size_t height = 5; + constexpr size_t width = 9; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{height, width}, default_value}; SECTION("simple merge") { // Set up in such a way that values cannot be merged upon update @@ -696,19 +696,17 @@ TEST_CASE("region_map merges truncated result boxes with the same value upon que // The exact result is ambiguous depending on how boxes were merged. However there should always be 3 CHECK(results.size() == 3); // One is the non-mergeable default-initialized section - CHECK(std::any_of(results.begin(), results.end(), [default_value](auto& r) { - return r == std::pair{GridBox<2>{{3, 3}, {height, width}}, default_value}; - })); + CHECK(std::any_of(results.begin(), results.end(), [default_value](auto& r) { return r == std::pair{box<2>{{3, 3}, {height, width}}, default_value}; })); // The other two are either of these two variants const bool variant_1 = std::any_of(results.begin(), results.end(), [](auto& r) { - return r == std::pair{GridBox<2>{{1, 1}, {height, 3}}, 3}; + return r == std::pair{box<2>{{1, 1}, {height, 3}}, size_t(3)}; }) && std::any_of(results.begin(), results.end(), [](auto& r) { - return r == std::pair{GridBox<2>{{1, 3}, {3, width}}, 3}; + return r == std::pair{box<2>{{1, 3}, {3, width}}, size_t(3)}; }); const bool variant_2 = std::any_of(results.begin(), results.end(), [](auto& r) { - return r == std::pair{GridBox<2>{{1, 1}, {3, width}}, 3}; + return r == std::pair{box<2>{{1, 1}, {3, width}}, size_t(3)}; }) && std::any_of(results.begin(), results.end(), [](auto& r) { - return r == std::pair{GridBox<2>{{3, 1}, {height, 3}}, 3}; + return r == std::pair{box<2>{{3, 1}, {height, 3}}, size_t(3)}; }); CHECK(variant_1 != variant_2); } @@ -717,11 +715,11 @@ TEST_CASE("region_map merges truncated result boxes with the same value upon que } TEST_CASE("region_map merges truncated result boxes with the same value upon querying in 3D", "[region_map]") { - constexpr int64_t depth = 32; - constexpr int64_t height = 64; - constexpr int64_t width = 96; - constexpr int default_value = -1; - region_map_impl rm{{depth, height, width}, default_value}; + constexpr size_t depth = 32; + constexpr size_t height = 64; + constexpr size_t width = 96; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{depth, height, width}, default_value}; SECTION("simple merge") { // Setup in such a way that values cannot be merged upon update @@ -739,11 +737,11 @@ TEST_CASE("region_map merges truncated result boxes with the same value upon que } TEST_CASE("region_map supports apply_to_values", "[region_map]") { - constexpr int64_t size = 128; - constexpr int default_value = -1; - region_map_impl rm{{size}, default_value}; + constexpr size_t size = 128; + constexpr size_t default_value = std::numeric_limits::max(); + region_map_impl rm{{size}, default_value}; - const auto query_and_check = [&](const GridBox<1>& box, int expected) { + const auto query_and_check = [&](const box<1>& box, size_t expected) { const auto results = rm.get_region_values(box); CHECK(results.size() == 1); CHECK(results[0] == std::pair{box, expected}); @@ -755,7 +753,7 @@ TEST_CASE("region_map supports apply_to_values", "[region_map]") { rm.update_box({96, size}, 4); SECTION("basic value update") { - rm.apply_to_values([](int v) { return v * v; }); + rm.apply_to_values([](size_t v) { return v * v; }); query_and_check({0, 32}, 1); query_and_check({32, 64}, 4); query_and_check({64, 96}, 9); @@ -764,7 +762,7 @@ TEST_CASE("region_map supports apply_to_values", "[region_map]") { SECTION("same values are merged after update") { CHECK(region_map_testspy::get_num_leaf_nodes(rm) == 4); - rm.apply_to_values([](int v) { return v != 2 ? 42 : 1337; }); + rm.apply_to_values([](size_t v) -> size_t { return v != 2 ? 42 : 1337; }); CHECK(region_map_testspy::get_num_leaf_nodes(rm) == 3); query_and_check({0, 32}, 42); query_and_check({32, 64}, 1337); @@ -776,22 +774,22 @@ TEST_CASE("region_map supports apply_to_values", "[region_map]") { TEST_CASE("inserting consecutive boxes results in zero overlap", "[region_map][performance]") { const bool row_wise_insert = GENERATE(true, false); - const int64_t height = 64; - const int64_t width = 128; - region_map_impl rm{{height, width}, -1}; + const size_t height = 64; + const size_t width = 128; + region_map_impl rm{{height, width}, std::numeric_limits::max()}; - const int64_t count_sqrt = 4; + const size_t count_sqrt = 4; REQUIRE(height % count_sqrt == 0); REQUIRE(width % count_sqrt == 0); - const auto insert_box = [&](const int64_t i, const int64_t j) { - const GridPoint<2> min = {i * (height / count_sqrt), j * (width / count_sqrt)}; - const GridPoint<2> max = min + GridPoint<2>{height / count_sqrt, width / count_sqrt}; + const auto insert_box = [&](const size_t i, const size_t j) { + const id<2> min = {i * (height / count_sqrt), j * (width / count_sqrt)}; + const id<2> max = min + id<2>{height / count_sqrt, width / count_sqrt}; rm.update_box({min, max}, i * count_sqrt + j); }; - for(int64_t i = 0; i < count_sqrt; ++i) { - for(int64_t j = 0; j < count_sqrt; ++j) { + for(size_t i = 0; i < count_sqrt; ++i) { + for(size_t j = 0; j < count_sqrt; ++j) { if(row_wise_insert) { insert_box(i, j); } else { diff --git a/test/runtime_tests.cc b/test/runtime_tests.cc index be3f39108..9555aea03 100644 --- a/test/runtime_tests.cc +++ b/test/runtime_tests.cc @@ -268,10 +268,10 @@ namespace detail { REQUIRE(bam.get_access_modes(buf_b.get_id()).count(cl::sycl::access::mode::discard_read_write) == 1); const auto reqs_a = bam.get_mode_requirements( buf_a.get_id(), cl::sycl::access::mode::read, tsk->get_dimensions(), {tsk->get_global_offset(), tsk->get_global_size()}, tsk->get_global_size()); - REQUIRE(reqs_a == subrange_to_grid_box(subrange<3>({32, 24, 0}, {32, 128, 1}))); + REQUIRE(reqs_a == box(subrange<3>({32, 24, 0}, {32, 128, 1}))); const auto reqs_b = bam.get_mode_requirements(buf_b.get_id(), cl::sycl::access::mode::discard_read_write, tsk->get_dimensions(), {tsk->get_global_offset(), tsk->get_global_size()}, tsk->get_global_size()); - REQUIRE(reqs_b == subrange_to_grid_box(subrange<3>({}, {5, 18, 74}))); + REQUIRE(reqs_b == box(subrange<3>({}, {5, 18, 74}))); } TEST_CASE("buffer_access_map merges multiple accesses with the same mode", "[task][device_compute_task]") { @@ -279,13 +279,13 @@ namespace detail { bam.add_access(0, std::make_unique>>(subrange<2>{{3, 0}, {10, 20}}, cl::sycl::access::mode::read, range<2>{30, 30})); bam.add_access(0, std::make_unique>>(subrange<2>{{10, 0}, {7, 20}}, cl::sycl::access::mode::read, range<2>{30, 30})); const auto req = bam.get_mode_requirements(0, cl::sycl::access::mode::read, 2, subrange<3>({0, 0, 0}, {100, 100, 1}), {100, 100, 1}); - REQUIRE(req == subrange_to_grid_box(subrange<3>({3, 0, 0}, {14, 20, 1}))); + REQUIRE(req == box(subrange<3>({3, 0, 0}, {14, 20, 1}))); } TEST_CASE("tasks gracefully handle get_requirements() calls for buffers they don't access", "[task]") { buffer_access_map bam; const auto req = bam.get_mode_requirements(0, cl::sycl::access::mode::read, 3, subrange<3>({0, 0, 0}, {100, 1, 1}), {100, 1, 1}); - REQUIRE(req == subrange_to_grid_box(subrange<3>({0, 0, 0}, {0, 0, 0}))); + REQUIRE(req == box<3>()); } namespace foo { @@ -614,12 +614,12 @@ namespace detail { distr_queue q; const int n = 3; - const auto global_offset = detail::id_cast(id<3>{4, 5, 6}); + const auto global_offset = test_utils::truncate_id({4, 5, 6}); buffer linear_id{{n, Dims + 1}}; q.submit([&](handler& cgh) { accessor a{linear_id, cgh, celerity::access::all{}, write_only, no_init}; // all RM is sane because runtime_tests runs single-node - cgh.parallel_for>(detail::range_cast(range<3>{n, 1, 1}), global_offset, [=](celerity::item item) { + cgh.parallel_for>(detail::range_cast(range<1>{n}), global_offset, [=](celerity::item item) { auto i = (item.get_id() - item.get_offset())[0]; for(int d = 0; d < Dims; ++d) { a[i][d] = item[d]; diff --git a/test/system/distr_tests.cc b/test/system/distr_tests.cc index 1f06ff4bf..e5b8c8d8a 100644 --- a/test/system/distr_tests.cc +++ b/test/system/distr_tests.cc @@ -204,10 +204,10 @@ namespace detail { // Note: We assume a local range size of 165 here, this may not be supported by all devices. - auto global_range = range_cast(range<3>{n * 4 * 3, 3 * 5, 2 * 11}); - auto local_range = range_cast(range<3>{3, 5, 11}); - auto group_range = global_range / local_range; - auto global_offset = id_cast(id<3>{47, 53, 59}); + const auto global_range = test_utils::truncate_range({n * 4 * 3, 3 * 5, 2 * 11}); + const auto local_range = test_utils::truncate_range({3, 5, 11}); + const auto group_range = global_range / local_range; + const auto global_offset = test_utils::truncate_id({47, 53, 59}); buffer geo(global_range); diff --git a/test/test_utils.h b/test/test_utils.h index 789057fa7..eb7f3d8c9 100644 --- a/test/test_utils.h +++ b/test/test_utils.h @@ -13,6 +13,7 @@ #include #endif +#include // for keep_memory() #include #include @@ -360,36 +361,79 @@ namespace test_utils { ~task_test_context() { maybe_print_task_graph(trec); } }; + template + void black_hole(T&& v) { + Catch::Benchmark::keep_memory(&v); + } + + // truncate_*(): unchecked versions of *_cast() with signatures friendly to parameter type inference + + template + range truncate_range(const range<3>& r3) { + static_assert(Dims <= 3); + range r = detail::zero_range; + for(int d = 0; d < Dims; ++d) { + r[d] = r3[d]; + } + return r; + } + + template + id truncate_id(const id<3>& i3) { + static_assert(Dims <= 3); + id i; + for(int d = 0; d < Dims; ++d) { + i[d] = i3[d]; + } + return i; + } + + template + subrange truncate_subrange(const subrange<3>& sr3) { + return subrange(truncate_id(sr3.offset), truncate_range(sr3.range)); + } + + template + subrange truncate_chunk(const chunk<3>& ck3) { + return chunk(truncate_id(ck3.offset), truncate_range(ck3.range), truncate_range(ck3.global_size)); + } + + template + detail::box truncate_box(const detail::box<3>& b3) { + return detail::box(truncate_id(b3.get_min()), truncate_id(b3.get_max())); + } + } // namespace test_utils } // namespace celerity namespace Catch { -template -struct StringMaker> { - static std::string convert(const celerity::id& value) { - switch(Dims) { - case 1: return fmt::format("{{{}}}", value[0]); - case 2: return fmt::format("{{{}, {}}}", value[0], value[1]); - case 3: return fmt::format("{{{}, {}, {}}}", value[0], value[1], value[2]); - default: return {}; - } +template +struct StringMaker> { + static std::string convert(const std::pair& v) { + return fmt::format("({}, {})", Catch::Detail::stringify(v.first), Catch::Detail::stringify(v.second)); } }; -template -struct StringMaker> { - static std::string convert(const celerity::range& value) { - switch(Dims) { - case 1: return fmt::format("{{{}}}", value[0]); - case 2: return fmt::format("{{{}, {}}}", value[0], value[1]); - case 3: return fmt::format("{{{}, {}, {}}}", value[0], value[1], value[2]); - default: return {}; - } - } +template +struct StringMaker> { + static std::string convert(const std::optional& v) { return v.has_value() ? Catch::Detail::stringify(*v) : "null"; } }; +#define CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(Type) \ + template \ + struct StringMaker> { \ + static std::string convert(const Type& v) { return fmt::format("{}", v); } \ + }; + +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::id) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::range) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::subrange) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::chunk) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::detail::box) +CELERITY_TEST_UTILS_IMPLEMENT_CATCH_STRING_MAKER_FOR_DIMS(celerity::detail::region) + template <> struct StringMaker { static std::string convert(const sycl::device& d) { diff --git a/vendor/allscale/VERSION b/vendor/allscale/VERSION deleted file mode 100644 index 1374b2b8a..000000000 --- a/vendor/allscale/VERSION +++ /dev/null @@ -1,7 +0,0 @@ -AllScale API @ d058bb3f2c7782900fce9e5efdf093a16df56d6a -https://github.com/allscale/allscale_api - -Contains small changes in files - api/core/impl/reference/treeture.h - utils/functional_utils.h -to make it compile with MSVC2015. diff --git a/vendor/allscale/api/core/data.h b/vendor/allscale/api/core/data.h deleted file mode 100644 index c2636c877..000000000 --- a/vendor/allscale/api/core/data.h +++ /dev/null @@ -1,287 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/concepts.h" -#include "allscale/utils/serializer.h" - -namespace allscale { -namespace api { -namespace core { - - namespace sema { - - // c++ versions of the data item element access helper functions, facilitating compiler analysis - template - T& _data_item_element_access(DataItem&, const typename DataItem::region_type&, T& ref) { - return ref; - } - - template - const T& _data_item_element_access(const DataItem&, const typename DataItem::region_type&, const T& ref) { - return ref; - } - - /** - * A user-defined read requirement on a region of a data item. - */ - template - void needs_read_access(const DataItem& item, const typename DataItem::region_type& region) { - int a = 0; a = _data_item_element_access(item,region,a); - }; - - /** - * A user-defined write requirement on a region of a data item. - */ - template - void needs_write_access(const DataItem& item, const typename DataItem::region_type& region) { - int a = 0; _data_item_element_access(item,region,a) = 0; - }; - - /** - * Instruct compiler to ignore dependencies in the enclosing scope. - */ - inline void no_more_dependencies() {}; - - } - - // a macro to wrap up data_item_element_access calls, - // eliminating the overhead of creating a region instance on every access - // the ternary operation enforces type checks even on reference compilations - #ifndef ALLSCALECC - #define data_item_element_access(DataItem,Region,Res) \ - ((false) ? allscale::api::core::sema::_data_item_element_access(DataItem,Region,Res) : Res) - #else - #define data_item_element_access(DataItem,Region,Res) allscale::api::core::sema::_data_item_element_access(DataItem,Region,Res) - #endif - - // --------------------------------------------------------------------------------- - // Regions - // --------------------------------------------------------------------------------- - - - template - struct is_region : public std::false_type {}; - - template - struct is_region::value && - - // regions have to be serializable - utils::is_serializable::value && - - // there has to be an emptiness check - std::is_same::value && - - // there has to be an union operation - std::is_same::value && - - // there has to be an intersection operation - std::is_same::value && - - // there has to be a set difference operation - std::is_same::value && - - // there has to be a span operator, computing the hull of two regions - std::is_same::value, - - void>::type> : public std::true_type {}; - - - - - // --------------------------------------------------------------------------------- - // Fragments - // --------------------------------------------------------------------------------- - - - - template - struct is_fragment : public std::false_type {}; - - template - struct is_fragment::value && - - // fragments need to be constructible for a given region - std::is_same(), std::declval())), F>::value && - - // fragments need to be destructible - std::is_destructible::value && - - // the region covered by the fragment has to be obtainable - std::is_same::value && - - // there has to be a resize operator - std::is_same::value && - - // there is an insert operator importing data from an existing fragment - std::is_same::value && - - // there is a extract operator extracting a region of data from the present fragment - std::is_same::value && - - // there is a insert operator, importing previously extracted data into this fragment - std::is_same::value && - - // can be concerted into a facade - std::is_same::value, - - void>::type> : public std::true_type{}; - - - - - - // --------------------------------------------------------------------------------- - // SharedData - // --------------------------------------------------------------------------------- - - - template - struct is_shared_data : public std::false_type {}; - - template - struct is_shared_data::value && - - // regions have to be serializable - utils::is_serializable::value, - - void>::type> : public std::true_type {}; - - - // --------------------------------------------------------------------------------- - // Facade - // --------------------------------------------------------------------------------- - - - template - struct is_facade : public std::false_type {}; - - template - struct is_facade::value && - - // nor copy-assignable - !std::is_copy_assignable::value && - - // fragments need to be destructible - std::is_destructible::value, - - void>::type> : public std::true_type {}; - - - // --------------------------------------------------------------------------------- - // Data Items - // --------------------------------------------------------------------------------- - - - template - struct is_data_item : public std::false_type {}; - - template - struct is_data_item::value && - is_facade::value && - is_fragment::value && - is_shared_data::value, - void>::type> : public std::true_type {}; - - - template< - typename Fragment - > - struct data_item { - - // make sure the region type is satisfying the concept - static_assert(is_region::value, "Region type must fit region concept!"); - static_assert(is_fragment::value, "Fragment type must fit fragment concept!"); - static_assert(is_shared_data::value, "Shared data type must fit shared data concept!"); - - using fragment_type = Fragment; - using region_type = typename Fragment::region_type; - using facade_type = typename Fragment::facade_type; - using shared_data_type = typename Fragment::shared_data_type; - - // define default init/copy/move support - - data_item() = default; - data_item(data_item&&) = default; - data_item(const data_item&) = delete; - - data_item& operator=(const data_item&) = delete; - data_item& operator=(data_item&&) = default; - }; - - - // --------------------------------------------------------------------------------- - // Utilities - // --------------------------------------------------------------------------------- - - - /** - * A generic utility to compute whether a region a is covering a sub-set of a region b. - */ - template - typename std::enable_if::value,bool>::type - isSubRegion(const R& a, const R& b) { - return R::difference(a,b).empty(); - } - - /** - * A convenience wrapper for computing the span (e.g. convex hull) between two data regions. - */ - template - typename std::enable_if::value,R>::type - span(const R& a, const R& b) { - return R::span(a,b); - } - - /** - * A convince wrapper for merging a number of regions (single element base-case). - */ - template - typename std::enable_if::value,R>::type - merge(const R& a) { - return a; - } - - /** - * A convince wrapper for merging a number of regions (multiple element step-case). - */ - template - typename std::enable_if::value,R>::type - merge(const R& a, const Rs& ... rest) { - return R::merge(a,merge(rest...)); - } - - /** - * A default implementation of shared data for data items that do not need shared any shared data. - */ - struct no_shared_data { - - void store(utils::ArchiveWriter&) const { - // nothing to do - } - - static no_shared_data load(utils::ArchiveReader&) { - return no_shared_data(); - } - - }; - - // make sure the no_shared_data is a shared data instance - static_assert(is_shared_data::value, "no_shared_data type does not fulfill shared data concept!"); - -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/io.h b/vendor/allscale/api/core/impl/reference/io.h deleted file mode 100644 index d4e12e061..000000000 --- a/vendor/allscale/api/core/impl/reference/io.h +++ /dev/null @@ -1,1109 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -#ifdef _MSC_VER - // includes - #include - // marcos for function identifiers - #define CLOSE_WRAPPER _close - #define LSEEK_WRAPPER _lseek - #define OPEN_WRAPPER _open - #define READ_WRAPPER _read - #define WRITE_WRAPPER _write - // macros for flags - #define S_IRUSR _S_IREAD - #define S_IWUSR _S_IWRITE -#else - // includes - #include - #include - // marcos for function identifiers - #define CLOSE_WRAPPER close - #define LSEEK_WRAPPER lseek - #define OPEN_WRAPPER open - #define READ_WRAPPER read - #define WRITE_WRAPPER write -#endif - -#include -#include -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/serializer.h" - - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - /** - * Supported IO modes. - */ - enum class Mode { - Text, Binary - }; - - /** - * The kind of handle to reference entities within an IO manager. - */ - struct Entry { - std::size_t id; - bool operator<(const Entry& other) const { return id < other.id; } - }; - - /** - * A common base class for Input and Output Streams. - */ - class IOStream { - protected: - - Entry entry; - - std::mutex operation_lock; - - IOStream(const Entry& entry) : entry(entry) {} - - IOStream(IOStream&& other) - : entry(other.entry) {} - - public: - - Entry getEntry() const { - return entry; - } - - }; - - /** - * A stream to load data in the form of a stream of entries. - */ - class InputStream : public IOStream { - - template - friend class IOManager; - - public: - struct IStreamWrapper { - std::istream& in; - IStreamWrapper(std::istream& in) : in(in) {} - template - IStreamWrapper& operator>>(T& value) { - in >> value; - return *this; - } - template - T read() { - T value; - in.read((char*)&value, sizeof(T)); - return value; - } - template - IStreamWrapper& read(T& res) { - in.read((char*)&res, sizeof(T)); - return *this; - } - }; - - private: - IStreamWrapper in; - - InputStream(const Entry& entry, std::istream& in) - : IOStream(entry), in(in) {} - - public: - - InputStream(InputStream&& other) - : IOStream(std::move(other)), in(other.in) {} - - template - void atomic(const Body& body) { - // protect output by locking it - std::lock_guard lease(operation_lock); - - // let the body read it's information - body(in); - - // free the lock - automatically - } - - template - void operator>>(T& value) { - atomic([&](IStreamWrapper& in) { in >> value; }); - } - - template - T read() { - T res; - atomic([&](IStreamWrapper& in) { - res = in.read(); - }); - return res; - } - - operator bool() const { - return (bool)in.in; - } - - static InputStream& load(utils::ArchiveReader&) { - assert_not_implemented(); - exit(1); // prevent return warning - } - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - }; - - /** - * A stream to store data in the form of a stream of entries. - */ - class OutputStream : public IOStream { - - template - friend class IOManager; - - public: - struct OStreamWrapper { - std::ostream& out; - OStreamWrapper(std::ostream& out) : out(out) {} - template - OStreamWrapper& operator<<(const T& value) { - out << value; - return *this; - } - OStreamWrapper& operator<<(const char* value) { - out << value; - return *this; - } - template - OStreamWrapper& write(const T& value) { - out.write((char*)&value, sizeof(T)); - return *this; - } - }; - - private: - OStreamWrapper out; - - OutputStream(const Entry& entry, std::ostream& out) - : IOStream(entry), out(out) {} - - public: - - OutputStream(OutputStream&& other) - : IOStream(std::move(other)), out(other.out) {} - - template - void atomic(const Body& body) { - // protect output by locking it - std::lock_guard lease(operation_lock); - - // let the body write it's information - body(out); - - // free the lock - automatically - } - - template - void operator<<(const T& value) { - atomic([&](OStreamWrapper& out) { - out << value; - }); - } - void operator<<(const char* value) { - atomic([&](OStreamWrapper& out) { - out << value; - }); - } - - template - void write(const T& value) { - atomic([&](OStreamWrapper& out) { - out.write(value); - }); - } - - operator bool() const { - return (bool)out.out; - } - - static OutputStream& load(utils::ArchiveReader&) { - assert_not_implemented(); - exit(1); // prevent return warning - } - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - }; - - - - - class MemoryMappedIO { - - Entry entry; - - void* base; - - public: - - MemoryMappedIO(const Entry& entry, void* base) - : entry(entry), base(base) {} - - Entry getEntry() const { - return entry; - } - - protected: - - void* getBase() const { - return base; - } - - }; - - class MemoryMappedInput : public MemoryMappedIO { - - template - friend class IOManager; - - MemoryMappedInput(const Entry& entry, void* base) - : MemoryMappedIO(entry,base) {} - - public: - - template - const T& access() const { - return *static_cast(getBase()); - } - - // -- make it serializable -- - - static MemoryMappedInput load(utils::ArchiveReader&) { - assert_not_implemented(); - exit(1); // prevent return warning - } - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - }; - - class MemoryMappedOutput : public MemoryMappedIO { - - template - friend class IOManager; - - MemoryMappedOutput(const Entry& entry, void* base) - : MemoryMappedIO(entry,base) {} - - public: - - template - T& access() const { - return *static_cast(getBase()); - } - - // -- make it serializable -- - - static MemoryMappedOutput load(utils::ArchiveReader&) { - assert_not_implemented(); - exit(1); // prevent return warning - } - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - }; - - /** - * An IO manager, as the central dispatcher for IO operations. - */ - template - class IOManager { - - /** - * The underlying store. - */ - StorageManager store; - - /** - * The central register of all open output streams. - */ - std::map inputStreams; - - /** - * The central register of all open output streams. - */ - std::map outputStreams; - - /** - * The central register of all open memory mapped inputs. - */ - std::map memoryMappedInputs; - - /** - * The central register of all open memory mapped outputs. - */ - std::map memoryMappedOutputs; - - public: - - ~IOManager() { - // close and destroy all input streams - for(auto& cur : inputStreams) { - closeStream(cur.second); - } - // close and destroy all output streams - for(auto& cur : outputStreams) { - closeStream(cur.second); - } - // close and destroy all memory mapped inputs - for(auto& cur : memoryMappedInputs) { - closeMemoryMappedIO(cur.second); - } - // close and destroy all memory mapped outputs - for(auto& cur : memoryMappedOutputs) { - closeMemoryMappedIO(cur.second); - } - } - - /** - * Creates a new entry with the given name in the underlying storage system. - * - * @param name the name of the entry (e.g. file) - * @param mode whether it is a binary or text file - * @return a entry ID referencing the newly created resource - */ - Entry createEntry(const std::string& name, Mode mode = Mode::Text) { - return store.createEntry(name, mode); - } - - /** - * Register a new output stream with the given name within the system. - * The call will create the underlying file and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the name of the stream to be opened -- nothing happens if already opened - */ - InputStream& openInputStream(Entry entry) { - - // check for present - auto pos = inputStreams.find(entry); - if (pos != inputStreams.end()) return pos->second; - - // create new input stream - InputStream res(entry, *store.createInputStream(entry)); - - // register stream - inputStreams.emplace(entry, std::move(res)); - - // return result - return getInputStream(entry); - } - - /** - * Register a new output stream with the given name within the system. - * The call will create the underlying file and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the name of the stream to be opened -- nothing happens if already opened - */ - OutputStream& openOutputStream(Entry entry) { - - // check for present - auto pos = outputStreams.find(entry); - if (pos != outputStreams.end()) return pos->second; - - // create new input stream - OutputStream res(entry, *store.createOutputStream(entry)); - - // register stream - outputStreams.emplace(entry, std::move(res)); - - // return result - return getOutputStream(entry); - } - - /** - * Register a new memory mapped input with the given name within the system. - * The call will load the underlying storage and prepare input operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the storage entry to be opened -- nothing happens if already opened - */ - MemoryMappedInput openMemoryMappedInput(Entry entry) { - - // check for present - auto pos = memoryMappedInputs.find(entry); - if (pos != memoryMappedInputs.end()) return pos->second; - - // create new input stream - MemoryMappedInput res(entry, store.createMemoryMappedInput(entry)); - - // register stream - memoryMappedInputs.emplace(entry, std::move(res)); - - // return result - return getMemoryMappedInput(entry); - } - - /** - * Register a new memory mapped output with the given name within the system. - * The call will create the underlying storage and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the storage entry to be opened -- nothing happens if already opened - */ - MemoryMappedOutput openMemoryMappedOutput(Entry entry, std::size_t size) { - - // check for present - auto pos = memoryMappedOutputs.find(entry); - if (pos != memoryMappedOutputs.end()) return pos->second; - - // create new input stream - MemoryMappedOutput res(entry, store.createMemoryMappedOutput(entry,size)); - - // register stream - memoryMappedOutputs.emplace(entry, std::move(res)); - - // return result - return getMemoryMappedOutput(entry); - } - - - /** - * Obtains an input stream to read data from a storage entry. - * The storage entry is maintained by the manager and the provided output stream - * is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a stream to append data to - */ - InputStream& getInputStream(Entry entry) { - assert_true(inputStreams.find(entry) != inputStreams.end()); - return inputStreams.find(entry)->second; - } - - /** - * Obtains an output stream to write data to a storage entry. - * The storage entry is maintained by the manager and the provided output stream - * is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a stream to append data to - */ - OutputStream& getOutputStream(Entry entry) { - assert_true(outputStreams.find(entry) != outputStreams.end()); - return outputStreams.find(entry)->second; - } - - /** - * Obtains a memory mapped input to read data from a storage entry. - * The storage entry is maintained by the manager and the provided memory mapped - * input is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a requested memory mapped input - */ - MemoryMappedInput getMemoryMappedInput(Entry entry) { - assert_true(memoryMappedInputs.find(entry) != memoryMappedInputs.end()); - return memoryMappedInputs.find(entry)->second; - } - - /** - * Obtains a memory mapped output to write data to a storage entry. - * The storage entry is maintained by the manager and the provided memory mapped - * output is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a requested memory mapped output - */ - MemoryMappedOutput getMemoryMappedOutput(Entry entry) { - assert_true(memoryMappedOutputs.find(entry) != memoryMappedOutputs.end()); - return memoryMappedOutputs.find(entry)->second; - } - - /** - * Closes the stream with the given name. - */ - void closeInputStream(Entry entry) { - // get the stream - auto pos = inputStreams.find(entry); - if (pos == inputStreams.end()) return; - - // close the stream - closeStream(pos->second); - - // erase the entry - inputStreams.erase(pos); - } - - /** - * Closes the stream with the given name. - */ - void closeOutputStream(Entry entry) { - // get the stream - auto pos = outputStreams.find(entry); - if (pos == outputStreams.end()) return; - - // close the stream - closeStream(pos->second); - - // erase the entry - outputStreams.erase(pos); - } - - /** - * Closes the given stream. - */ - void close(const InputStream& in) { - closeInputStream(in.getEntry()); - } - - /** - * Closes the given stream. - */ - void close(const OutputStream& out) { - closeOutputStream(out.getEntry()); - } - - /** - * Closes the given memory mapped input. - */ - void close(const MemoryMappedInput& in) { - auto pos = memoryMappedInputs.find(in.getEntry()); - if (pos == memoryMappedInputs.end()) return; - - // remove memory mapping - closeMemoryMappedIO(in); - - // erase entry from register - memoryMappedInputs.erase(pos); - } - - /** - * Closes the given memory mapped output. - */ - void close(const MemoryMappedOutput& out) { - auto pos = memoryMappedOutputs.find(out.getEntry()); - if (pos == memoryMappedOutputs.end()) return; - - // remove memory mapping - closeMemoryMappedIO(out); - - // erase entry from register - memoryMappedOutputs.erase(pos); - } - - /** - * Determines whether the given entry exists. - */ - bool exists(Entry entry) const { - return store.exists(entry); - } - - /** - * Deletes the entry with the given name. - */ - void remove(Entry entry) { - store.remove(entry); - } - - private: - - /** - * Closes the given input stream. - */ - void closeStream(InputStream& in) { - // closes the stream - store.close(in.in.in); - } - - /** - * Closes the given output stream. - */ - void closeStream(OutputStream& out) { - // closes the stream - store.close(out.out.out); - } - - /** - * Close the given memory mapped IO connection. - */ - void closeMemoryMappedIO(const MemoryMappedInput& input) { - // closes the memory mapped input - store.close(input); - } - - /** - * Close the given memory mapped IO connection. - */ - void closeMemoryMappedIO(const MemoryMappedOutput& output) { - // closes the memory mapped output - store.close(output); - } - - }; - - - - // ---------------------------------------------------------------------- - // for in-memory buffer operations - // ---------------------------------------------------------------------- - - - struct BufferStorageFactory { - - struct Buffer { - std::string name; - Mode mode; - std::stringstream* stream; - }; - - struct MemoryMappedBuffer { - std::size_t size; - void* base; - }; - - std::size_t counter = 0; - - std::map buffers; - - std::map memoryMappedBuffers; - - ~BufferStorageFactory() { - for(const auto& cur : buffers) delete cur.second.stream; - for(const auto& cur : memoryMappedBuffers) free(cur.second.base); - } - - Entry createEntry(const std::string& name, Mode mode) { - // check for present entry - for(const auto& cur : buffers) { - if (cur.second.name == name) { - return cur.first; - } - } - - // create a new entry - Entry id{counter++}; - Buffer& entry = buffers[id]; - entry.name = name; - entry.mode = mode; - entry.stream = nullptr; - return id; - } - - std::istream* createInputStream(Entry entry) { - - // search for entry - auto pos = buffers.find(entry); - if (pos == buffers.end()) { - assert_fail() << "Unable to create input stream to unknown entity!"; - return nullptr; - } - - - // reuse current stream content - std::stringstream* old = pos->second.stream; - std::stringstream* res = (pos->second.mode == Mode::Binary) ? - new std::stringstream((old) ? old->str() : std::basic_string(), std::ios_base::in | std::ios_base::binary ) : - new std::stringstream((old) ? old->str() : std::basic_string(), std::ios_base::in ); - delete old; - pos->second.stream = res; - return res; - } - - std::ostream* createOutputStream(Entry entry) { - - // search for entry - auto pos = buffers.find(entry); - if (pos == buffers.end()) { - assert_fail() << "Unable to create output stream to unknown entity!"; - return nullptr; - } - - // reuse current stream content - std::stringstream* old = pos->second.stream; - std::stringstream* res = (pos->second.mode == Mode::Binary) ? - new std::stringstream((old) ? old->str() : std::basic_string(), std::ios_base::out | std::ios_base::binary ) : - new std::stringstream((old) ? old->str() : std::basic_string(), std::ios_base::out ); - delete old; - pos->second.stream = res; - return res; - } - - void* createMemoryMappedInput(const Entry& entry) { - // the target buffer needs to be present - auto pos = memoryMappedBuffers.find(entry); - if (pos == memoryMappedBuffers.end()) return nullptr; - return pos->second.base; - } - - void* createMemoryMappedOutput(const Entry& entry, std::size_t size) { - // check whether there is already such a buffer - auto pos = memoryMappedBuffers.find(entry); - if (pos != memoryMappedBuffers.end()) { - // use existing - assert_eq(size,pos->second.size) << "Cannot change size of buffer during re-opening!"; - return pos->second.base; - } - - // create a new buffer - auto& buffer = memoryMappedBuffers[entry]; - buffer.size = size; - buffer.base = std::malloc(size); - return buffer.base; - } - - void close(const MemoryMappedIO&) { - // nothing to do - } - - void close(std::istream&) { - // nothing to do - } - - void close(std::ostream&) { - // nothing to do - } - - bool exists(Entry entry) const { - return buffers.find(entry) != buffers.end(); - } - - void remove(Entry entry) { - auto pos = buffers.find(entry); - if (pos == buffers.end()) return; - delete pos->second.stream; - buffers.erase(pos); - } - }; - - class BufferIOManager : public IOManager { - - }; - - - // ---------------------------------------------------------------------- - // for file IO - // ---------------------------------------------------------------------- - - struct FileStorageFactory { - - using file_descriptor = int; - - struct File { - // general - std::string name; - Mode mode; - - // for memory-mapped files - file_descriptor fd; - std::size_t size; - void* base; - - File(const std::string& name, Mode mode) - : name(name), mode(mode), fd(0), size(0), base(nullptr) {} - - }; - - std::vector files; - - Entry createEntry(const std::string& name, Mode mode) { - // check for present entry - for(std::size_t i=0; i < files.size(); ++i) { - if (files[i].name == name) return Entry{i}; - } - - // create a new entry - Entry id{files.size()}; - files.push_back(File(name,mode)); - return id; - } - - std::istream* createInputStream(Entry entry) { - - // check valid entry id - if (entry.id >= files.size()) { - assert_fail() << "Unable to create input stream to unknown entity!"; - return nullptr; - } - - // create a matching file stream - const File& file = files[entry.id]; - return (file.mode == Mode::Binary) ? - new std::fstream(file.name,std::ios_base::in | std::ios_base::binary) : - new std::fstream(file.name,std::ios_base::in); - } - - std::ostream* createOutputStream(Entry entry) { - - // check valid entry id - if (entry.id >= files.size()) { - assert_fail() << "Unable to create output stream to unknown entity!"; - return nullptr; - } - - // create a matching file stream - const File& file = files[entry.id]; - return (file.mode == Mode::Binary) ? - new std::fstream(file.name,std::ios_base::out | std::ios_base::binary) : - new std::fstream(file.name,std::ios_base::out); - } - - void* createMemoryMappedInput(const Entry& entry) { - - // get a reference to the covered file - File& file = getFile(entry); - - // check that file is not already mapped - assert_true(file.base==nullptr) - << "Error: file already previously opened!"; - - // get the file descriptor - file.fd = getFileDescriptor(file,true); - - // resolve the file size - file.size = getFileSize(file); - -#ifndef _MSC_VER - // map file into address space - file.base = mmap(nullptr,file.size, PROT_READ, MAP_PRIVATE, file.fd, 0); - // check result of mmap - if (!checkMappedAddress(file.base)) file.base = nullptr; -#else - // if no support for memory mapped io, try to read the entire file into a buffer - file.base = malloc(file.size); - auto bytesRead = READ_WRAPPER(file.fd, file.base, (unsigned)file.size); - if (bytesRead < 0) { - free(file.base); - file.base = nullptr; - } -#endif - - // return pointer to base address - return file.base; - } - - void* createMemoryMappedOutput(const Entry& entry, std::size_t size) { - - // get a reference to the covered file - File& file = getFile(entry); - - // check that file is not already mapped - assert_true(file.base==nullptr) - << "Error: file already previously opened!"; - - // get the file descriptor - file.fd = createFile(file,size); - - // fix the file size - file.size = size; - -#ifndef _MSC_VER - // map file into address space - file.base = mmap(nullptr,file.size, PROT_READ | PROT_WRITE, MAP_SHARED, file.fd, 0); - // check result of mmap - if (!checkMappedAddress(file.base)) file.base = nullptr; -#else - file.base = malloc(size); -#endif - - // return pointer to base address - return file.base; - } - - void close(std::istream& stream) { - delete &stream; - } - - void close(std::ostream& stream) { - delete &stream; - } - - void close(const MemoryMappedInput& mmi) { - close(mmi, false); - } - - void close(const MemoryMappedOutput& mmo) { - close(mmo, true); - } - - bool exists(Entry entry) const { - if (entry.id >= files.size()) return false; - struct stat buffer; - return stat(files[entry.id].name.c_str(), &buffer) == 0; - } - - void remove(Entry entry) { - if (entry.id >= files.size()) return; - std::remove(files[entry.id].name.c_str()); - } - - private: - - File& getFile(const Entry& entry) { - - // check valid entry id - if (entry.id >= files.size()) { - assert_fail() << "Unknown file entry: " << entry.id; - return files[0]; - } - - // provide access - return files[entry.id]; - } - - static file_descriptor createFile(const File& file, std::size_t size) { - - // create the new file - auto fd = OPEN_WRAPPER(file.name.c_str(), O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR ); - assert_ne(-1,fd) << "Error creating file " << file.name; - - // fix size of file - LSEEK_WRAPPER(fd,(long)(size-1),SEEK_SET); - - // write a byte at the end - char data = 0; - auto res = WRITE_WRAPPER(fd,&data,1); - assert_eq(1,res) << "Could not write byte at end of file."; - if (res != 1) return 0; - - // move cursor back to start - LSEEK_WRAPPER(fd,0,SEEK_SET); - - // return file descriptor - return fd; - } - - static file_descriptor getFileDescriptor(const File& file, bool readOnly) { - - // get the register entry - if (file.fd > 0) return file.fd; - - // get name of file - const char* name = file.name.c_str(); - - // get file descriptor from file name - auto fd = OPEN_WRAPPER(name, ((readOnly) ? O_RDONLY : O_RDWR ) ); - assert_ne(-1,fd) << "Error opening file " << name; - - // return the obtained file descriptor - return fd; - - } - - static std::size_t getFileSize(const File& file) { - - // get size of file - struct stat fileStat; - auto succ = stat(file.name.c_str(),&fileStat); - assert_eq(0,succ) << "Unable to obtain size of input file: " << file.name; - - if (succ != 0) return 0; - - // get the file size - return fileStat.st_size; - } - - static bool checkMappedAddress(void* addr) { -#ifndef _MSC_VER - // compare with error token - if (addr != MAP_FAILED) return true; - char buffer[2000]; - std::cout << strerror_r(errno,buffer,2000); -#endif - // fail with message if mapping failed - // or if mapped address checking was requested on MSVC platforms - assert_fail() << "Failed to map file into address space!"; - return false; - } - - void close(const MemoryMappedIO& mmio, bool requiresWrite) { - - auto entry = mmio.getEntry(); - - // check valid entry id - if (entry.id >= files.size()) { - assert_fail() << "Unable to close memory mapped input to unknown entity!"; - return; - } - - // get the register entry - File& file = files[entry.id]; - if (!file.base) return; - - int succ = 0; -#ifndef _MSC_VER - // unmap the file from the address space - succ = munmap(file.base, file.size); - assert_eq(0, succ) - << "Unable to unmap file " << file.name; - // if it was not successful, stop it here - if (succ != 0) return; - // silence unused parameter warning - (void)requiresWrite; -#else - // if no support for memory mapped io, just write full buffer contents to file and free buffer - if (requiresWrite) { - auto bytesWritten = WRITE_WRAPPER(file.fd, file.base, (unsigned)file.size); - free(file.base); - assert_le(0, bytesWritten) - << "Unable to write to file " << file.name << ", " << strerror(errno) << " " << file.fd; - } -#endif - - // close the file descriptor - succ = ::CLOSE_WRAPPER(file.fd); - assert_eq(0, succ) << "Unable to close file " << file.name; - - // reset the file descriptor - file.fd = 0; - - // reset the base pointer - file.base = nullptr; - - } - - }; - - class FileIOManager : public IOManager { - FileIOManager() {}; - public: - static FileIOManager& getInstance() { - static FileIOManager manager; - return manager; - } - }; - - - /** - * Obtains access to the singleton instance of the File IO manager. - */ - inline static FileIOManager& getFileIOManager() { - return FileIOManager::getInstance(); - } - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/lock.h b/vendor/allscale/api/core/impl/reference/lock.h deleted file mode 100644 index aba34c237..000000000 --- a/vendor/allscale/api/core/impl/reference/lock.h +++ /dev/null @@ -1,242 +0,0 @@ -#pragma once - -#include -#include - -#if defined _MSC_VER -// required for YieldProcessor macro -#define NOMINMAX -#include "windows.h" -//#elif defined (__ppc64__) || defined (_ARCH_PPC64) - -#endif - -namespace allscale { -namespace api { -namespace core { -inline namespace simple { - - /* Pause instruction to prevent excess processor bus usage */ - -#ifdef _MSC_VER -#define cpu_relax() YieldProcessor() -#elif defined (__ppc64__) || defined (_ARCH_PPC64) -#define __barrier() __asm__ volatile("": : :"memory") -#define __HMT_low() __asm__ volatile("or 1,1,1 # low priority") -#define __HMT_medium() __asm__ volatile("or 2,2,2 # medium priority") -#define cpu_relax() do { __HMT_low(); __HMT_medium(); __barrier(); } while (0) -#else -#define cpu_relax() __builtin_ia32_pause() -#endif - - class Waiter { - int i; - public: - Waiter() : i(0) {} - - void operator()() { - ++i; - if ((i % 1000) == 0) { - // there was no progress => let others work - std::this_thread::yield(); - } else { - // relax this CPU - cpu_relax(); - } - } - }; - - - - class SpinLock { - std::atomic lck; - public: - - SpinLock() : lck(0) { - } - - void lock() { - Waiter wait; - while(!try_lock()) wait(); - } - - bool try_lock() { - int should = 0; - return lck.compare_exchange_weak(should, 1, std::memory_order_acquire); - } - - void unlock() { - lck.store(0, std::memory_order_release); - } - }; - - /** - * An optimistic read/write lock. - */ - class OptimisticReadWriteLock { - - /** - * The type utilized for the version numbering. - */ - using version_t = std::size_t; - - /** - * The version number. - * - even: there is no write in progress - * - odd: there is a write in progress, do not allow read operations - */ - std::atomic version; - - public: - - /** - * The lease utilized to link start and end of read phases. - */ - class Lease { - friend class OptimisticReadWriteLock; - version_t version; - public: - Lease(version_t version = 0) : version(version) {} - Lease(const Lease& lease) = default; - Lease& operator=(const Lease& other) = default; - Lease& operator=(Lease&& other) = default; - }; - - OptimisticReadWriteLock() : version(0) {} - - /** - * Starts a read phase, making sure that there is currently no - * active concurrent modification going on. The resulting lease - * enables the invoking process to later-on verify that no - * concurrent modifications took place. - */ - Lease start_read() { - Waiter wait; - - // get a snapshot of the lease version - auto v = version.load(std::memory_order_acquire); - - // spin while there is a write in progress - while((v & 0x1) == 1) { - // wait for a moment - wait(); - // get an updated version - v = version.load(std::memory_order_acquire); - } - - // done - return Lease(v); - } - - /** - * Tests whether there have been concurrent modifications since - * the given lease has been issued. - * - * @return true if no updates have been conducted, false otherwise - */ - bool validate(const Lease& lease) { - // check whether version number has changed in the mean-while - return lease.version == version.load(std::memory_order_consume); - } - - /** - * Ends a read phase by validating the given lease. - * - * @return true if no updates have been conducted since the - * issuing of the lease, false otherwise - */ - bool end_read(const Lease& lease) { - // check lease in the end - return validate(lease); - } - - /** - * Starts a write phase on this lock be ensuring exclusive access - * and invalidating any existing read lease. - */ - void start_write() { - Waiter wait; - - // set last bit => make it odd - auto v = version.fetch_or(0x1, std::memory_order_acquire); - - // check for concurrent writes - while((v & 0x1) == 1) { - // wait for a moment - wait(); - // get an updated version - v = version.fetch_or(0x1, std::memory_order_acquire); - } - - // done - } - - /** - * Tries to start a write phase unless there is a currently ongoing - * write operation. In this case no write permission will be obtained. - * - * @return true if write permission has been granted, false otherwise. - */ - bool try_start_write() { - auto v = version.fetch_or(0x1, std::memory_order_acquire); - return !(v & 0x1); - } - - /** - * Updates a read-lease to a write permission by a) validating that the - * given lease is still valid and b) making sure that there is no currently - * ongoing write operation. - * - * @return true if the lease was still valid and write permissions could - * be granted, false otherwise. - */ - bool try_upgrade_to_write(const Lease& lease) { - auto v = version.fetch_or(0x1, std::memory_order_acquire); - - // check whether write privileges have been gained - if (v & 0x1) return false;// there is another writer already - - // check whether there was no write since the gain of the read lock - if (lease.version == v) return true; - - // if there was, undo write update - abort_write(); - - // operation failed - return false; - } - - /** - * Aborts a write operation by reverting to the version number before - * starting the ongoing write, thereby re-validating existing leases. - */ - void abort_write() { - // reset version number - version.fetch_sub(1,std::memory_order_release); - } - - /** - * Ends a write operation by giving up the associated exclusive access - * to the protected data and abandoning the provided write permission. - */ - void end_write() { - // update version number another time - version.fetch_add(1,std::memory_order_release); - } - - /** - * Tests whether currently write permissions have been granted to any - * client by this lock. - * - * @return true if so, false otherwise - */ - bool is_write_locked() const { - return version & 0x1; - } - - }; - -} // end namespace simple -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/profiling.h b/vendor/allscale/api/core/impl/reference/profiling.h deleted file mode 100644 index cffd9f535..000000000 --- a/vendor/allscale/api/core/impl/reference/profiling.h +++ /dev/null @@ -1,411 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "allscale/api/core/impl/reference/task_id.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - /** - * A log entry within the performance log. - */ - class ProfileLogEntry { - - public: - - /** - * Codes enumerating possible events. - */ - enum Kind { - // worker events - WorkerCreated, // < the first event to be logged - WorkerSuspended, // < a worker thread is suspended - WorkerResumed, // < a worker thread is resumed - WorkerDestroyed, // < a worker thread is terminated - - // task events - TaskStolen, // < a task got stolen - TaskSplit, // < a task got split - TaskStarted, // < a task processing got started - TaskEnded, // < a task processing finished - - // control events - EndOfStream, // < the last event, to mark the end of a stream - }; - - private: - - uint64_t time; - - Kind kind; - - TaskID task; - - ProfileLogEntry(uint64_t time, Kind kind) - : time(time), kind(kind), task() {} - - ProfileLogEntry(uint64_t time, Kind kind, TaskID task) - : time(time), kind(kind), task(task) {} - - public: - - ProfileLogEntry() =default; - - // -- observers -- - - uint64_t getTimestamp() const { - return time; - } - - Kind getKind() const { - return kind; - } - - TaskID getTask() const { - return task; - } - - // -- factories -- - - static ProfileLogEntry createWorkerCreatedEntry() { - return ProfileLogEntry(getCurrentTime(), WorkerCreated); - } - - static ProfileLogEntry createWorkerDestroyedEntry() { - return ProfileLogEntry(getCurrentTime(), WorkerDestroyed); - } - - static ProfileLogEntry createWorkerSuspendedEntry() { - return ProfileLogEntry(getCurrentTime(), WorkerSuspended); - } - - static ProfileLogEntry createWorkerResumedEntry() { - return ProfileLogEntry(getCurrentTime(), WorkerResumed); - } - - static ProfileLogEntry createTaskStolenEntry(const TaskID& task) { - return ProfileLogEntry(getCurrentTime(), TaskStolen, task); - } - - static ProfileLogEntry createTaskStartedEntry(const TaskID& task) { - return ProfileLogEntry(getCurrentTime(), TaskStarted, task); - } - - static ProfileLogEntry createTaskEndedEntry(const TaskID& task) { - return ProfileLogEntry(getCurrentTime(), TaskEnded, task); - } - - // -- utility functions -- - - bool operator<(const ProfileLogEntry& other) { - // sort events by time - return time < other.time; - } - - friend std::ostream& operator<<(std::ostream& out, const ProfileLogEntry& entry) { - - out << "@" << entry.time << ":"; - - switch(entry.kind) { - // worker events - case WorkerCreated: return out << "Worker created"; - case WorkerSuspended: return out << "Worker suspended"; - case WorkerResumed: return out << "Worker resumed"; - case WorkerDestroyed: return out << "Worker destroyed"; - - // task events - case TaskStolen: return out << "Task " << entry.task << " stolen"; - case TaskSplit: return out << "Task " << entry.task << " split"; - case TaskStarted: return out << "Task " << entry.task << " started"; - case TaskEnded: return out << "Task " << entry.task << " ended"; - - // everything else - default: return out << "Unknown event!"; - } - } - - private: - - /** - * A utility to retrieve a timestamp for events. - */ - static uint64_t getCurrentTime() { - static thread_local uint64_t last = 0; - - // get current time - uint64_t cur = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now().time_since_epoch() - ).count(); - - // make sure time is progressing - if (cur > last) { - last = cur; - return cur; - } - - // increase by at least one time step - return last+1; - } - - }; - - - - class ProfileLog { - - public: - - // the block size of the log - enum { BATCH_SIZE = 100000 }; - - private: - - using block_t = std::array; - using block_list_t = std::list; - - using block_const_iter = block_list_t::const_iterator; - using block_iter = block_list_t::iterator; - - using entry_const_iter = block_t::const_iterator; - using entry_iter = block_t::iterator; - - // the log entries, organized in blocks of N entries - block_list_t data; - - entry_iter next; - entry_iter endOfBlock; - - public: - - ProfileLog() : next(nullptr), endOfBlock(nullptr) {} - - void addEntry(const ProfileLogEntry& entry) { - // create a new block if necessary - if (next == endOfBlock) { - data.emplace_back(); - next = data.back().begin(); - endOfBlock = data.back().end(); - } - - // insert entry - *next = entry; - ++next; - } - - ProfileLog& operator<<(const ProfileLogEntry& entry) { - addEntry(entry); - return *this; - } - - - // -- log entry iteration -- - - class iterator : public std::iterator { - - block_const_iter b_cur; - block_const_iter b_end; - - entry_const_iter e_cur; - entry_const_iter e_end; - - entry_const_iter log_end; - - public: - - static iterator begin(const block_list_t& blocks, const entry_const_iter& log_end) { - iterator res; - res.b_cur = blocks.begin(); - res.b_end = blocks.end(); - if (res.isEnd()) return res; - res.e_cur = res.b_cur->begin(); - res.e_end = res.b_cur->end(); - res.log_end = log_end; - return res; - } - - static iterator end(const block_list_t& blocks) { - iterator res; - res.b_cur = blocks.end(); - res.b_end = blocks.end(); - return res; - } - - bool operator==(const iterator& other) const { - return isEnd() && other.isEnd(); - } - - bool operator!=(const iterator& other) const { - return !(*this == other); - } - - const ProfileLogEntry& operator*() const { - return *e_cur; - } - - iterator& operator++() { - // go to next entry - ++e_cur; - - // if it is the end of the log => jump to end of iterator range - if (e_cur == log_end) { - b_cur = b_end; - return *this; - } - - // if not end of current block is reached, continue - if (e_cur != e_end) return *this; - - // go to next block - b_cur++; - - // if there is none, mark as done - if (b_cur == b_end) return *this; - - // walk into next block - e_cur = b_cur->begin(); - e_end = b_cur->end(); - return *this; - } - - private: - - bool isEnd() const { - return b_cur == b_end; - } - - }; - - iterator begin() const { - return iterator::begin(data,next); - } - - iterator end() const { - return iterator::end(data); - } - - - void saveTo(std::ostream& out) { - // save the number of blocks - std::size_t num_blocks = data.size(); - out.write((char*)&num_blocks,sizeof(num_blocks)); - - // save the offset of the last block - std::size_t offset = 0; - if (num_blocks > 0) { - offset = next - data.back().begin(); - } - out.write((char*)&offset,sizeof(offset)); - - // save all blocks - for(const auto& cur : data) { - out.write((char*)&cur,sizeof(block_t)); - } - } - - void saveTo(const std::string& file) { - std::fstream trg(file.c_str(), std::ios::out | std::ios::binary); - saveTo(trg); - } - - static ProfileLog loadFrom(std::istream& in) { - // load the number of blocks - std::size_t num_blocks; - in.read((char*)&num_blocks,sizeof(num_blocks)); - - // load the offset for the last block - std::size_t offset; - in.read((char*)&offset,sizeof(offset)); - - ProfileLog log; - for(std::size_t i = 0; i 0) { - log.next = log.data.back().begin() + offset; - } - - // done - return log; - } - - static ProfileLog loadFrom(const std::string& file) { - std::fstream src(file.c_str(), std::ios::in | std::ios::binary); - return loadFrom(src); - } - - }; - - inline std::string getLogFileNameForWorker(int id) { - // create the filename - char filename[17]; - assert_lt(id, 10000) << "Unexpectedly larger number of workers"; - snprintf(filename, 17, "profile_log.%04d", ((unsigned)id)%10000); - return filename; - } - - static inline int& getCurrentWorkerID() { - static thread_local int workerID; - return workerID; - } - - static inline void setCurrentWorkerID(int id) { - getCurrentWorkerID() = id; - } - - namespace detail { - - struct ProfileLogHandler { - ProfileLog log; - - ~ProfileLogHandler() { - // save log to the chosen filename - log.saveTo(getLogFileNameForWorker(getCurrentWorkerID())); - } - }; - - inline ProfileLog& getProfileLog() { - static thread_local ProfileLogHandler logHandler; - return logHandler.log; - } - - inline void logProfilerEventInternal(const ProfileLogEntry& entry) { - getProfileLog() << entry; - } - - } - - - #ifdef ENABLE_PROFILING - - const bool PROFILING_ENABLED = true; - - #define logProfilerEvent(EVENT) \ - allscale::api::core::impl::reference::detail::logProfilerEventInternal(EVENT) - - #else - - const bool PROFILING_ENABLED = false; - - #define logProfilerEvent(EVENT) /* ignore */ - - #endif - - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/queue.h b/vendor/allscale/api/core/impl/reference/queue.h deleted file mode 100644 index c20bf72a0..000000000 --- a/vendor/allscale/api/core/impl/reference/queue.h +++ /dev/null @@ -1,380 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include "allscale/utils/printer/arrays.h" -#include "allscale/api/core/impl/reference/lock.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - - template - class BoundQueue { - - public: - - static const size_t capacity = Capacity; - - private: - - using guard = std::lock_guard; - - static const size_t buffer_size = capacity + 1; - - mutable SpinLock lock; - - std::array data; - - size_t front; - size_t back; - - public: - - BoundQueue() : lock(), front(0), back(0) { - for(auto& cur : data) cur = T(); - } - - bool empty() const { - return front == back; - } - bool full() const { - return ((back + 1) % buffer_size) == front; - } - - bool push_front(const T& t) { - guard g(lock); - if (full()) { - return false; - } - front = (front - 1 + buffer_size) % buffer_size; - data[front] = t; - return true; - } - - bool push_back(const T& t) { - guard g(lock); - if (full()) { - return false; - } - data[back] = t; - back = (back + 1) % buffer_size; - return true; - } - - private: - - T pop_front_internal() { - if (empty()) { - return T(); - } - T res(std::move(data[front])); - front = (front + 1) % buffer_size; - return res; - } - - T pop_back_internal() { - if (empty()) { - return T(); - } - back = (back - 1 + buffer_size) % buffer_size; - T res(std::move(data[back])); - return res; - } - - public: - - T pop_front() { - guard g(lock); - return pop_front_internal(); - } - - T try_pop_front() { - if (!lock.try_lock()) { - return {}; - } - const T& res = pop_front_internal(); - lock.unlock(); - return res; - } - - T pop_back() { - guard g(lock); - return pop_back_internal(); - } - - T try_pop_back() { - if (!lock.try_lock()) { - return {}; - } - const T& res = pop_back_internal(); - lock.unlock(); - return res; - } - - size_t size() const { - guard g(lock); - return (back >= front) ? (back - front) : (buffer_size - (front - back)); - } - - std::vector getSnapshot() const { - std::vector res; - guard g(lock); - size_t i = front; - while(i != back) { - res.push_back(data[i]); - i += (i + 1) % buffer_size; - } - return res; - } - - friend std::ostream& operator<<(std::ostream& out, const BoundQueue& queue) { - guard g(queue.lock); - return out << "[" << queue.data << "," << queue.front << " - " << queue.back << "]"; - } - - }; - - - - template - class UnboundQueue { - - using guard = std::lock_guard; - - mutable SpinLock lock; - - std::list data; - - std::atomic num_entries; - - public: - - UnboundQueue() : lock(), num_entries(0) {} - - void push_front(const T& t) { - guard g(lock); - data.push_front(t); - ++num_entries; - } - - void push_back(const T& t) { - guard g(lock); - data.push_back(t); - ++num_entries; - } - - private: - - T pop_front_internal() { - if (data.empty()) { - return T(); - } - T res(std::move(data.front())); - data.pop_front(); - --num_entries; - return res; - } - - T pop_back_internal() { - if (data.empty()) { - return T(); - } - T res(std::move(data.back())); - data.pop_back(); - --num_entries; - return res; - } - - public: - - T pop_front() { - guard g(lock); - return pop_front_internal(); - } - - T try_pop_front() { - if (!lock.try_lock()) { - return {}; - } - const T& res = pop_front_internal(); - lock.unlock(); - return res; - } - - T pop_back() { - guard g(lock); - return pop_back_internal(); - } - - T try_pop_back() { - if (!lock.try_lock()) { - return {}; - } - const T& res = pop_back_internal(); - lock.unlock(); - return res; - } - - bool empty() const { - return num_entries == 0; - } - - size_t size() const { - return num_entries; - } - - std::vector getSnapshot() const { - guard g(lock); - return std::vector(data.begin(),data.end()); - } - - }; - - - template - class OptimisticUnboundQueue { - - mutable OptimisticReadWriteLock lock; - - std::list data; - - std::atomic num_entries; - - public: - - OptimisticUnboundQueue() : lock(), num_entries(0) {} - - void push_front(const T& t) { - lock.start_write(); - data.push_front(t); - ++num_entries; - lock.end_write(); - } - - void push_back(const T& t) { - lock.start_write(); - data.push_back(t); - ++num_entries; - lock.end_write(); - } - - private: - - template - T pop_front_internal() { - // manual tail-recursion optimization since - // debug builds may fail to do so - while(true) { - - // start with a read permit - auto lease = lock.start_read(); - - // check whether it is empty - if (data.empty()) { - return T(); - } - - // to retrieve data, upgrade to a write - if (!lock.try_upgrade_to_write(lease)) { - // if upgrade failed, restart procedure if requested - if (tryOnlyOnce) return T(); - continue; // start over again - } - - // now this one has write access (exclusive) - T res(std::move(data.front())); - data.pop_front(); - --num_entries; - - // write is complete - lock.end_write(); - - // done - return res; - - } - } - - template - T pop_back_internal() { - // manual tail-recursion optimization since - // debug builds may fail to do so - while(true) { - - // start with a read permit - auto lease = lock.start_read(); - - // check whether it is empty - if (data.empty()) { - return T(); - } - - // to retrieve data, upgrade to a write - if (!lock.try_upgrade_to_write(lease)) { - // if upgrade failed, restart procedure if requested - if (tryOnlyOnce) return T(); - continue; // start over again - } - - // now this one has write access (exclusive) - T res(std::move(data.back())); - data.pop_back(); - --num_entries; - - // write is complete - lock.end_write(); - - // done - return res; - } - } - - public: - - T pop_front() { - return pop_front_internal(); - } - - T try_pop_front() { - return pop_front_internal(); - } - - T pop_back() { - return pop_back_internal(); - } - - T try_pop_back() { - return pop_back_internal(); - } - - bool empty() const { - return num_entries == 0; - } - - size_t size() const { - return num_entries; - } - - std::vector getSnapshot() const { - lock.start_write(); - std::vector res(data.begin(),data.end()); - lock.end_write(); - return res; - } - - }; - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/runtime_predictor.h b/vendor/allscale/api/core/impl/reference/runtime_predictor.h deleted file mode 100644 index a4e785066..000000000 --- a/vendor/allscale/api/core/impl/reference/runtime_predictor.h +++ /dev/null @@ -1,231 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#if defined _MSC_VER -#include -#elif defined (__ppc64__) || defined (_ARCH_PPC64) || defined(__powerpc__) || defined(__ppc__) -static __inline__ unsigned long long __rdtsc(void) -{ - int64_t tb; - asm("mfspr %0, 268" : "=r"(tb)); - return tb; -} -#else -#include -#endif - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - /** - * A type to represent a type safe cycle count. - */ - class CycleCount { - - using time_t = unsigned long long; - - time_t value; - - public: - - CycleCount() {} - - CycleCount(time_t value) : value(value) {} - - bool operator==(const CycleCount& other) const { - return value == other.value; - } - - bool operator!=(const CycleCount& other) const { - return value != other.value; - } - - bool operator<(const CycleCount& other) const { - return value < other.value; - } - - bool operator>(const CycleCount& other) const { - return value > other.value; - } - - CycleCount operator+(const CycleCount& other) const { - return value + other.value; - } - - CycleCount operator-(const CycleCount& other) const { - return value - other.value; - } - - time_t count() const { - return value; - } - - static CycleCount zero() { - return 0; - } - - static CycleCount max() { - return std::numeric_limits::max(); - } - - }; - - inline CycleCount operator*(long unsigned int f, const CycleCount& count) { - return f * count.count(); - } - - inline CycleCount operator*(const CycleCount& count, long unsigned int f) { - return count.count() * f; - } - - inline CycleCount operator/(const CycleCount& count, long unsigned int div) { - return count.count() / div; - } - - /** - * A cycle clock for the time prediction. - */ - struct CycleClock { - - using time_point = CycleCount; - using duration = CycleCount; - - static time_point now() { - return __rdtsc(); - } - - }; - - - /** - * A utility to estimate the execution time of tasks on different - * levels of task-decomposition steps. - */ - class RuntimePredictor { - - public: - - using clock = CycleClock; - - using duration = clock::duration; - - enum { MAX_LEVELS = 100 }; - - private: - - /** - * The number of samples recorded per task level. - */ - std::array samples; - - /** - * The current estimates of execution times of tasks. - */ - std::array times; - - public: - - RuntimePredictor(unsigned numWorkers = std::thread::hardware_concurrency()) { - // reset number of collected samples - samples.fill(0); - - // initialize time estimates - times.fill(duration::zero()); - - // initialize execution times up to a given level - for(int i=0; i= MAX_LEVELS) return duration::zero(); - return times[level]; - } - - /** - * Update the predictions for a level. - */ - void registerTime(std::size_t level, const duration& time) { - - // update matching level - updateTime(level,time); - - // update higher levels (with reduced weight) - auto smallerTime = time / 2; - auto largerTime = time * 2; - for(std::size_t d = 1; d < 5; d++) { - - // update higher element - if (d <= level) { - updateTime(level-d,largerTime); - } - - // update smaller element - if (level+d < MAX_LEVELS) { - updateTime(level+d,smallerTime); - } - - // update parameters - smallerTime = smallerTime / 2; - largerTime = largerTime * 2; - } - - } - - /** - * Enable the printing of the predictor state. - */ - friend std::ostream& operator<<(std::ostream& out, const RuntimePredictor& pred) { - out << "Predictions:\n"; - for(int i = 0; i - inline RuntimePredictor& getRuntimePredictor() { - static thread_local RuntimePredictor predictor = RuntimePredictor(); - return predictor; - } - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/task_id.h b/vendor/allscale/api/core/impl/reference/task_id.h deleted file mode 100644 index 42a86b3e5..000000000 --- a/vendor/allscale/api/core/impl/reference/task_id.h +++ /dev/null @@ -1,257 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "allscale/utils/assert.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - /** - * The path part of a task ID. The path is the part of and ID addressing - * a certain sub-task of a decomposable task. - */ - class TaskPath { - - using path_t = std::uint64_t; - using length_t = std::uint8_t; - - path_t path; - length_t length; - - TaskPath(path_t path, length_t length) : path(path), length(length) {} - - public: - - TaskPath() = default; - - static TaskPath root() { - return TaskPath{0,0}; - } - - bool isRoot() const { - return length == 0; - } - - path_t getPath() const { - return path; - } - - length_t getLength() const { - return length; - } - - bool operator==(const TaskPath& other) const { - return path == other.path && length == other.length; - } - - bool operator!=(const TaskPath& other) const { - return !(*this == other); - } - - bool operator<(const TaskPath& other) const { - // get common prefix length - auto min_len = std::min(length, other.length); - - auto pA = path >> (length - min_len); - auto pB = other.path >> (other.length - min_len); - - // lexicographical compare - if (pA == pB) { - return length < other.length; - } - - // compare prefix comparison - return pA < pB; - } - - bool isPrefixOf(const TaskPath& other) const { - return length < other.length && (path == other.path >> (other.length - length)); - } - - TaskPath getLeftChildPath() const { - assert_lt((std::size_t)length,sizeof(path)*8); - auto res = *this; - res.path = res.path << 1; - ++res.length; - return res; - } - - TaskPath getRightChildPath() const { - auto res = getLeftChildPath(); - res.path = res.path + 1; - return res; - } - - TaskPath& descentLeft() { - path = path << 1; - return *this; - } - - TaskPath& descentRight() { - descentLeft(); - path += 1; - return *this; - } - - // --- path iterator support --- - - enum Direction { - Left = 0, Right = 1 - }; - - - class path_iterator : public std::iterator { - - path_t path; - length_t pos; - Direction cur; - - path_iterator(path_t path, length_t pos, Direction cur) - : path(path), pos(pos), cur(cur) {} - - public: - - static path_iterator begin(path_t path, length_t length) { - if (length == 0) return end(path); - return path_iterator( path, length, Direction((path >> (length-1)) % 2) ); - } - - static path_iterator end(path_t path) { - return path_iterator( path, 0, Left ); - } - - bool operator==(const path_iterator& other) const { - return pos == other.pos && path == other.path; - } - - bool operator!=(const path_iterator& other) const { - return !(*this == other); - } - - const Direction& operator*() const { - return cur; - } - - path_iterator& operator++() { - --pos; - if (pos==0) return *this; // we have reached the end - cur = Direction((path >> (pos-1)) % 2); - return *this; - } - - }; - - path_iterator begin() const { - return path_iterator::begin(path,length); - } - - path_iterator end() const { - return path_iterator::end(path); - } - - - // --- print support --- - - friend std::ostream& operator<<(std::ostream& out, const TaskPath& path) { - for(const auto& cur : path) { - out << "." << cur; - } - return out; - } - - }; - - /** - * An identifier of work items. Each work item is either a root-work-item, - * created by an initial prec call, or a child work item created through the - * splitting of a parent work item. The identifier is tracing this parent-child - * relationship. - * - * E.g. parent work item ID: - * - * T-12.0.1.0.1 - * - * child work items: - * - * T-12.0.1.0.1.0 and WI-12.0.1.0.1.1 - * - */ - class TaskID { - - std::uint64_t id; - TaskPath path; - - public: - - TaskID() = default; - - TaskID(std::uint64_t id) : id(id), path(TaskPath::root()) {} - - TaskID(std::uint64_t id, const TaskPath& path) - : id(id), path(path) {} - - - // -- observers -- - - std::uint64_t getRootID() const { - return id; - } - - const TaskPath& getPath() const { - return path; - } - - auto getDepth() const { - return path.getLength(); - } - - // -- utility functions -- - - bool operator==(const TaskID& other) const { - return id == other.id && path == other.path; - } - - bool operator!=(const TaskID& other) const { - return !(*this == other); - } - - bool operator<(const TaskID& other) const { - // check id - if (id < other.id) return true; - if (id > other.id) return false; - - // compare the paths - return path < other.path; - } - - bool isParentOf(const TaskID& child) const { - return id == child.id && path.isPrefixOf(child.path); - } - - TaskID getLeftChild() const { - return TaskID{ id, path.getLeftChildPath() }; - } - - TaskID getRightChild() const { - return TaskID{ id, path.getRightChildPath() }; - } - - - friend std::ostream& operator<<(std::ostream& out, const TaskID& id) { - return out << "T-" << id.id << id.path; - } - - }; - - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/impl/reference/treeture.h b/vendor/allscale/api/core/impl/reference/treeture.h deleted file mode 100644 index a3b5d16bf..000000000 --- a/vendor/allscale/api/core/impl/reference/treeture.h +++ /dev/null @@ -1,3031 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef __linux__ - #include -#endif - -#include "allscale/utils/assert.h" -#include "allscale/utils/bitmanipulation.h" - -#include "allscale/api/core/impl/reference/lock.h" -#include "allscale/api/core/impl/reference/profiling.h" -#include "allscale/api/core/impl/reference/queue.h" -#include "allscale/api/core/impl/reference/runtime_predictor.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace reference { - - // ------------------------------------- Declarations ----------------------------------------- - - /** - * The actual treeture, referencing the computation of a value. - */ - template - class treeture; - - /** - * A treeture not yet released to the runtime system for execution. - */ - template - class unreleased_treeture; - - /** - * A handle for a lazily constructed unreleased treeture. This intermediate construct is utilized - * for writing templated code that can be optimized to overhead-less computed values and to facilitate - * the support of the sequence combinator. - */ - template - class lazy_unreleased_treeture; - - /** - * A reference to a task to synchronize upon it. - */ - class task_reference; - - /** - * A class to model task dependencies - */ - template - class dependencies; - - - - // --------------------------------------------------------------------------------------------- - // Internal Forward Declarations - // --------------------------------------------------------------------------------------------- - - - class TaskBase; - - template - class Task; - - - // --------------------------------------------------------------------------------------------- - // Debugging - // --------------------------------------------------------------------------------------------- - - - // -- Declarations -- - - const bool REFERENCE_RUNTIME_DEBUG = false; - - inline std::mutex& getLogMutex() { - static std::mutex m; - return m; - } - - #define LOG(MSG) \ - { \ - if (REFERENCE_RUNTIME_DEBUG) { \ - std::thread::id this_id = std::this_thread::get_id(); \ - std::lock_guard lock(getLogMutex()); \ - std::cerr << "Thread " << this_id << ": " << MSG << "\n"; \ - } \ - } - - const bool DEBUG_SCHEDULE = false; - - #define LOG_SCHEDULE(MSG) \ - { \ - if (DEBUG_SCHEDULE) { \ - std::thread::id this_id = std::this_thread::get_id(); \ - std::lock_guard lock(getLogMutex()); \ - std::cerr << "Thread " << this_id << ": " << MSG << "\n"; \ - } \ - } - - const bool DEBUG_TASKS = false; - - #define LOG_TASKS(MSG) \ - { \ - if (DEBUG_TASKS) { \ - std::thread::id this_id = std::this_thread::get_id(); \ - std::lock_guard lock(getLogMutex()); \ - std::cerr << "Thread " << this_id << ": " << MSG << "\n"; \ - } \ - } - - - - // ----------------------------------------------------------------- - // Monitoring (for Debugging) - // ----------------------------------------------------------------- - - - const bool MONITORING_ENABLED = false; - - namespace monitoring { - - enum class EventType { - Run, RunDirect, Split, Wait, DependencyWait - }; - - struct Event { - - EventType type; - - const TaskBase* task; - - TaskID taskId; - - bool operator==(const Event& other) const { - return other.type == type && other.task == task && other.taskId == taskId; - } - - friend std::ostream& operator<<(std::ostream& out, const Event& e); - }; - - - class ThreadState { - - using guard = std::lock_guard; - - std::thread::id thread_id; - - std::mutex lock; - - std::vector eventStack; - - public: - - ThreadState() : thread_id(std::this_thread::get_id()) { - guard g(getStateLock()); - getStates().push_back(this); - } - - ~ThreadState() { - assert_true(eventStack.empty()); - } - - void pushEvent(const Event& e) { - guard g(lock); - eventStack.push_back(e); - } - - void popEvent(__allscale_unused const Event& e) { - guard g(lock); - assert_eq(e,eventStack.back()); - eventStack.pop_back(); - } - - void dumpState(std::ostream& out) { - guard g(lock); - out << "\nThread: " << thread_id << "\n"; - out << "\tStack:\n"; - for(const auto& cur : eventStack) { - out << "\t\t" << cur << "\n"; - } - out << "\t\t -- top of stack --\n"; - out << "\n"; - } - - static void dumpStates(std::ostream& out) { - // lock states - std::lock_guard g(getStateLock()); - - // provide a hint if there is no information - if (getStates().empty()) { - out << "No thread states recorded."; - if (!MONITORING_ENABLED) { - out << " You can enable it by setting the MONITORING_ENABLED flag in the code base."; - } - out << "\n"; - return; - } - - // print all current states - for(const auto& cur : getStates()) { - cur->dumpState(out); - } - } - - private: - - static std::mutex& getStateLock() { - static std::mutex state_lock; - return state_lock; - } - - static std::vector& getStates() { - static std::vector states; - return states; - } - - }; - - thread_local static ThreadState tl_thread_state; - - - struct Action { - - bool active; - Event e; - - Action() : active(false) {} - - Action(const Event& e) : active(true), e(e) { - // register action - tl_thread_state.pushEvent(e); - } - - Action(Action&& other) : active(other.active), e(other.e) { - other.active = false; - } - - Action(const Action&) = delete; - - Action& operator=(const Action&) = delete; - Action& operator=(Action&&) = delete; - - ~Action() { - if (!active) return; - // remove action from action stack - tl_thread_state.popEvent(e); - } - - }; - - inline Action log(EventType type, const TaskBase* task) { - assert_true(type != EventType::DependencyWait); - if (!MONITORING_ENABLED) return {}; - return Event{type,task,TaskID()}; - } - - inline Action log(EventType type, const TaskID& task) { - assert_true(type == EventType::DependencyWait); - if (!MONITORING_ENABLED) return {}; - return Event{type,nullptr,task}; - } - - } - - - - - // --------------------------------------------------------------------------------------------- - // Task Dependency Manager - // --------------------------------------------------------------------------------------------- - - template - class TaskDependencyManager { - - // dependencies are stored in a linked list - struct Entry { - TaskBase* task; - Entry* next; - }; - - using cell_type = std::atomic; - - enum { num_entries = 1<<(max_depth+1) }; - - // an epoch counter to facilitate re-use - std::atomic epoch; - - // the container for storing task dependencies, pointer tagging is used to test for completeness - cell_type data[num_entries]; - - public: - - TaskDependencyManager(std::size_t epoch = 0) : epoch(epoch) { - for(auto& cur : data) cur = nullptr; - } - - ~TaskDependencyManager() { - for(auto& cur : data) { - if (!isDone(cur)) { - // psalz: MSVC 2015 doesn't like deleting atomic pointers - // directly - delete cur.load(); - cur = nullptr; - } - } - } - - TaskDependencyManager(const TaskDependencyManager&) = delete; - TaskDependencyManager(TaskDependencyManager&&) = delete; - - TaskDependencyManager& operator=(const TaskDependencyManager&) = delete; - TaskDependencyManager& operator=(TaskDependencyManager&&) = delete; - - std::size_t getEpoch() const { - return epoch.load(); - } - - void startEpoch(std::size_t newEpoch) { - // make sure there is a change - assert_ne(epoch.load(),newEpoch); - - // re-set state - epoch = newEpoch; - for(auto& cur : data) { - // there should not be any dependencies left - assert_true(cur == nullptr || isDone(cur)); - - // reset dependencies - cur = nullptr; - } - } - - - /** - * Adds a dependency between the given tasks such that - * task x depends on the completion of the task y. - */ - void addDependency(TaskBase* x, const TaskPath& y); - - void markComplete(const TaskPath& task); - - bool isComplete(const TaskPath& path) const { - return isDone(data[getPosition(path)]); - } - - private: - - std::size_t getPosition(const TaskPath& path) const { - - // get length and path - auto l = path.getLength(); - auto p = path.getPath(); - - // limit length to max_depth - if (l > max_depth) { - p = p >> (l - max_depth); // effective path - l = max_depth; // effective depth - } - - // compute result - return (1 << l) | p; - } - - bool isDone(const Entry* ptr) const { - // if the last bit is set, the task already finished - return (intptr_t)(ptr) & 0x1; - } - - }; - - - - // --------------------------------------------------------------------------------------------- - // Task Family - // --------------------------------------------------------------------------------------------- - - - /** - * A task family is a collection of tasks descending from a common (single) ancestor. - * Task families are created by root-level prec operator calls, and manage the dependencies - * of all its members. - * - * Tasks being created through recursive or combine calls are initially not members of - * any family, but may get adapted (by being the result of a split operation). - */ - class TaskFamily { - - // TODO: make task dependency manager depth target system dependent - - using DependencyManager = TaskDependencyManager<6>; - - // the manager of all dependencies on members of this family - DependencyManager dependencies; - - // a flag determining whether this is a top-level task family - // (it is not created nested by a treeture but by the main thread) - bool top_level; - - public: - - /** - * Creates a new family, using a new ID. - */ - TaskFamily(bool top_level = false) : dependencies(getNextID()), top_level(top_level) {} - - /** - * Obtain the family ID. - */ - std::size_t getId() const { - return dependencies.getEpoch(); - } - - /** - * Tests whether this task family is a top-level family (not nested). - */ - bool isTopLevel() const { - return top_level; - } - - /** - * Tests whether the given sub-task is complete. - */ - bool isComplete(const TaskPath& path) const { - return dependencies.isComplete(path); - } - - /** - * Register a dependency ensuring that a task x is depending on a task y. - */ - void addDependency(TaskBase* x, const TaskPath& y) { - dependencies.addDependency(x,y); - } - - /** - * Mark the given task as being finished. - */ - void markDone(const TaskPath& x) { - dependencies.markComplete(x); - } - - /** - * A family ID generator. - */ - static unsigned getNextID() { - static std::atomic counter(0); - return ++counter; - } - - }; - - - // the pointer type to reference task families - using TaskFamilyPtr = TaskFamily*; - - /** - * A manager keeping track of created families. - */ - class TaskFamilyManager { - - SpinLock lock; - - std::vector> families; - - public: - - TaskFamilyPtr getFreshFamily(bool topLevel) { - std::lock_guard lease(lock); - - // TODO: replace this by a re-use based solution - - // gradually drain old family references - /* - if (families.size() > 20000) { - families.erase(families.begin(),families.begin() + families.size()/2); - } - */ - - // create a new family - families.push_back(std::make_unique(topLevel)); - return families.back().get(); - } - - }; - - - // a factory for a new task family - inline TaskFamilyPtr createFamily(bool topLevel = false) { - static TaskFamilyManager familyManager; - return familyManager.getFreshFamily(topLevel); - } - - - - // --------------------------------------------------------------------------------------------- - // task reference - // --------------------------------------------------------------------------------------------- - - - /** - * A reference to a task utilized for managing task synchronization. Tasks may - * only be synchronized on if they are members of a task family. - */ - class task_reference { - - // a weak reference to a task's family - TaskFamilyPtr family; - - TaskPath path; - - task_reference(const TaskFamilyPtr& family, const TaskPath& path) - : family(family), path(path) {} - - public: - - task_reference() : family(nullptr), path(TaskPath::root()) {} - - task_reference(const TaskBase& task); - - task_reference(const task_reference&) = default; - - task_reference(task_reference&& other) : family(other.family), path(other.path) { - other.family = nullptr; - } - - task_reference& operator=(const task_reference& other) = default; - - task_reference& operator=(task_reference&& other) { - family = other.family; - path = other.path; - other.family = nullptr; - return *this; - } - - bool isDone() const { - return (!family || family->isComplete(path)); - } - - bool valid() const { - return family; - } - - void wait() const; - - task_reference getLeft() const { - return task_reference ( family, path.getLeftChildPath() ); - } - - task_reference getRight() const { - return task_reference ( family, path.getRightChildPath() ); - } - - task_reference& descentLeft() { - path.descentLeft(); - return *this; - } - - task_reference& descentRight() { - path.descentRight(); - return *this; - } - - // -- implementation details -- - - TaskFamilyPtr getFamily() const { - return family; - } - - const TaskPath& getPath() const { - return path; - } - - }; - - - template - struct fixed_sized {}; - - struct dynamic_sized {}; - - /** - * A class to aggregate task dependencies. - */ - template - class dependencies; - - /** - * A specialization for empty task dependencies. - */ - template<> - class dependencies> { - - public: - - bool empty() const { - return true; - } - - std::size_t size() const { - return 0; - } - - const task_reference* begin() const { - return nullptr; - } - - const task_reference* end() const { - return nullptr; - } - - }; - - - /** - * A specialization for fixed-sized task dependencies. - */ - template - class dependencies> { - - template - friend dependencies> concat(const dependencies>&, const dependencies>&); - - std::array list; - - public: - - template - dependencies(const Args& ... args) : list({{args...}}) {} - - dependencies(const dependencies&) = default; - dependencies(dependencies&&) = default; - - dependencies& operator=(const dependencies&) = default; - dependencies& operator=(dependencies&&) = default; - - bool empty() const { - return Size == 0; - } - - std::size_t size() const { - return Size; - } - - const task_reference* begin() const { - return &(list[0]); - } - - const task_reference* end() const { - return begin()+Size; - } - - }; - - /** - * Enables the concatentation of two fixed-sized dependencies lists. - */ - template - dependencies> concat(const dependencies>& a, const dependencies>& b) { - dependencies> res; - for(std::size_t i=0; i - class dependencies { - - using list_type = std::vector; - - list_type* list; - - public: - - dependencies() : list(nullptr) {} - - dependencies(std::vector&& deps) - : list(new list_type(std::move(deps))) {} - - dependencies(const dependencies&) = delete; - - dependencies(dependencies&& other) : list(other.list){ - other.list = nullptr; - } - - ~dependencies() { - delete list; - } - - dependencies& operator=(const dependencies&) = delete; - - dependencies& operator=(dependencies&& other) { - if (list == other.list) return *this; - delete list; - list = other.list; - other.list = nullptr; - return *this; - } - - bool empty() const { - return list == nullptr; - } - - std::size_t size() const { - return (list) ? list->size() : 0; - } - - void add(const task_reference& ref) { - if (!list) list = new list_type(); - list->push_back(ref); - } - - const task_reference* begin() const { - return (list) ? &list->front() : nullptr; - } - - const task_reference* end() const { - return (list) ? (&list->back()) + 1 : nullptr; - } - - }; - - - // --------------------------------------------------------------------------------------------- - // promise - // --------------------------------------------------------------------------------------------- - - - /** - * A promise, forming the connection between a task and a treeture - * waiting for the task's result. - */ - template - class Promise { - - // a marker for delivered values - std::atomic ready; - - // the delivered value - T value; - - public: - - Promise() : ready(false) {} - - Promise(const T& value) - : ready(true), value(value) {} - - bool isReady() const { - return ready; - } - - const T& getValue() const { - return value; - } - - void setValue(const T& newValue) { - value = newValue; - ready = true; - } - }; - - /** - * A specialization for void promises. - */ - template<> - class Promise { - - // a marker for delivered promises - std::atomic ready; - - public: - - Promise(bool ready = false) - : ready(ready) {} - - bool isReady() const { - return ready; - } - - void setReady() { - ready = true; - } - - }; - - - template - using PromisePtr = std::shared_ptr>; - - - // --------------------------------------------------------------------------------------------- - // Tasks - // --------------------------------------------------------------------------------------------- - - - // the RT's interface to a task - class TaskBase { - - public: - - enum class State { - New, // < this task has been created, but not processed by a worker yet - Blocked, // < this task has unfinished dependencies - Ready, // < this task may be processed (scheduled in work queues) - Running, // < this task is running - Aggregating, // < this split task is aggregating results (skipped if not split) - Done // < this task is completed - }; - - friend std::ostream& operator<<(std::ostream& out, const State& state) { - switch(state) { - case State::New: return out << "New"; - case State::Blocked: return out << "Blocked"; - case State::Ready: return out << "Ready"; - case State::Running: return out << "Running"; - case State::Aggregating: return out << "Aggregating"; - case State::Done: return out << "Done"; - } - return out << "Invalid"; - } - - private: - - // the family this task belongs to, if null, this task is an orphan task. - TaskFamilyPtr family; - - // the position of this task within its family - TaskPath path; - - // A cached version of the task ID. This id - // is only valid if this task is not an orphan - TaskID id; - - // the current state of this task - std::atomic state; - - /** - * the number of active dependencies keeping this object alive and - * blocking its execution. Those dependencies include - * +1 for the unreleased treeture, subtracted once the task is released - * +1 for the parent, released once the parent is no longer interested in this task - * +1 for each task this task is waiting for, thus for each dependency - * - * Initially, there are 2 dependencies -- one for the parent, one for the release. - * - * Actions: - * 1 ... this task is started - * 0 ... this task is destroyed - */ - std::atomic num_active_dependencies; - - // indicates whether this task can be split - bool splitable; - - // split task data - TaskBase* left; - TaskBase* right; - - // for the mutation from a simple to a split task - TaskBase* substitute; - - // TODO: get rid of this - bool parallel; - - // for the processing of split tasks - TaskBase* parent; // < a pointer to the parent to be notified upon completion - std::atomic alive_child_counter; // < the number of active child tasks - - // a flag to remember that this task got a substitute, even after the - // substitute got cut lose - std::atomic substituted; - - public: - - TaskBase(bool done = false) - : family(), path(TaskPath::root()), id(TaskFamily::getNextID()), - state(done ? State::Done : State::New), - // one initial control flow dependency, released by treeture release - num_active_dependencies(done ? 1 : 2), - splitable(false), - left(nullptr), right(nullptr), substitute(nullptr), - parallel(false), parent(nullptr), - substituted(false) { - - LOG_TASKS( "Created " << *this ); - - // register this task - if (MONITORING_ENABLED) registerTask(*this); - } - - TaskBase(TaskBase* left, TaskBase* right, bool parallel) - : family(), - path(TaskPath::root()), id(TaskFamily::getNextID()), - state(State::New), - // one initial control flow dependency, released by treeture release - num_active_dependencies(2), - splitable(false), - left(left), right(right), substitute(nullptr), - parallel(parallel), - parent(nullptr), alive_child_counter(0), - substituted(false) { - - LOG_TASKS( "Created " << *this ); - assert(this->left); - assert(this->right); - - // fix the parent pointer - this->left->parent = this; - this->right->parent = this; - - // register this task - if (MONITORING_ENABLED) registerTask(*this); - } - - protected: - - // make the destructor private, such that only this class can destroy itself - virtual ~TaskBase() { - if (MONITORING_ENABLED) unregisterTask(*this); - LOG_TASKS( "Destroying Task " << *this ); - assert_true(isDone()) << getId() << " - " << getState(); - }; - - public: - - // -- observers -- - - const TaskFamilyPtr& getTaskFamily() const { - return family; - } - - const TaskPath& getTaskPath() const { - return path; - } - - TaskID getId() const { - return id; - } - - bool isOrphan() const { - return !family; - } - - std::size_t getDepth() const { - return path.getLength(); - } - - State getState() const { - // the substitute takes over the control of the state - if (substitute) return substitute->state; - return state; - } - - // each implementation is required to provide a runtime predictor - virtual RuntimePredictor& getRuntimePredictor() const = 0; - - // -- mutators -- - - void addDependency(const task_reference& ref) { - addDependencies(&ref,&ref+1); - } - - template - void addDependencies(const Iter& begin, const Iter& end) { - - // ignore empty dependencies - if (begin == end) return; - - // we must still be in the new state - assert_eq(getState(),State::New); - - // this task must not yet be started nor must the parent be lost - assert_le(2,num_active_dependencies); - - // increase the number of active dependencies - num_active_dependencies += (int)(end - begin); - - // register dependencies - for(auto it = begin; it != end; ++it) { - const auto& cur = *it; - - // filter out already completed tasks (some may be orphans) - if (cur.isDone()) { - // notify that one dependency more is completed - dependencyDone(); - // continue with next - continue; - } - - // add dependency - assert_true(cur.getFamily()); - cur.getFamily()->addDependency(this,cur.getPath()); - } - - } - - void adopt(const TaskFamilyPtr& family, const TaskPath& path = TaskPath()) { - // check that this task is not member of another family - assert_true(isOrphan()) << "Can not adopt a member of another family."; - - // check whether there is an actual family - if (!family) return; - - // join the family - this->family = family; - this->path = path; - - // update the id - this->id = TaskID(family->getId(),path); - - // mark as complete, if already complete - if(isDone()) family->markDone(path); - - // propagate adoption to descendants - if (substitute) substitute->adopt(family,path); - if (left) left->adopt(family, path.getLeftChildPath()); - if (right) right->adopt(family, path.getRightChildPath()); - } - - - // -- state transitions -- - - // New -> Blocked - void start(); - - // Blocked -> Ready transition is triggered by the last dependency - - // Ready -> Running - finish() -> Done - void run() { - - // log this event - auto action = monitoring::log(monitoring::EventType::Run, this); - - // process substituted tasks - if (substituted) { - // there is nothing to do - return; - } - - - LOG_TASKS( "Running Task " << *this ); - - // check that it is allowed to run - assert_eq(state, State::Ready); - assert_eq(1,num_active_dependencies); - - // update state - setState(State::Running); - - // process split tasks - if (isSplit()) { // if there is a left, it is a split task - - // check some assumptions - assert(left && right); - - State lState = left->state; - State rState = right->state; - - assert(lState == State::New || lState == State::Done); - assert(rState == State::New || rState == State::Done); - - // run task sequentially if requested - if (!parallel) { - - // TODO: implement sequential execution dependency based - alive_child_counter = 2; - - // process left first - if (lState != State::Done) { - left->start(); - } else { - // notify that this child is done - childDone(*left); - } - - // right child is started by childDone once left is finished - - // done - return; - - } - - // count number of sub-tasks to be started - assert_eq(0,alive_child_counter); - - // check which child tasks need to be started - if (lState == State::New && rState == State::New) { - - // both need to be started - alive_child_counter = 2; - left->start(); - right->start(); - - } else if (lState == State::New) { - - // only left has to be started - alive_child_counter = 1; - left->start(); - - } else if (rState == State::New) { - - // only left has to be started - alive_child_counter = 1; - right->start(); - - } else { - - // perform reduction immediately since sub-tasks are done - finish(); - - // done - return; - } - - // processing complete - - } else { - - // run computation - execute(); - - // finish task - finish(); - - } - } - - // Ready -> Split (if supported, otherwise remains Ready) - virtual bool split() { - // by default, no splitting is supported - assert_fail() << "This should not be reachable!"; - return false; - } - - // wait for the task completion - void wait(); - - bool isDone() const { - // simply check the state of this task - return state == State::Done; - } - - const TaskBase* getLeft() const { - // forward call to substitute if present - if (substitute) return substitute->getLeft(); - return left; - } - - const TaskBase* getRight() const { - // forward call to substitute if present - if (substitute) return substitute->getRight(); - return right; - } - - bool isSplitable() const { - return splitable; - } - - bool isSplit() const { - return (bool)left; - } - - bool isSubstituted() const { - return substituted; - } - - bool isReady() const { - if (substitute) return substitute->isReady(); - return state == State::Ready; - } - - void dependencyDone(); - - protected: - - /** - * A hook to define the operations to be conducted by this - * task instance. This function will only be triggered - * for non-split tasks. - */ - virtual void execute() =0; - - /** - * A hook to define post-operation operations triggered after - * the completion of this task or the completion of its child - * tasks. It should be utilized to retrieve results from - * substitutes or child tasks and aggregate those. - */ - virtual void aggregate() =0; - - void setSplitable(bool value = true) { - splitable = value && getDepth() < 60; - } - - void setSubstitute(TaskBase* newSub) { - - // must only be set once! - assert_false(substitute); - - // can only happen if this task is in blocked or ready state - assert_true(state == State::Blocked || state == State::Ready) - << "Actual state: " << state; - - // and the substitute must be valid - assert_true(newSub); - - // the substitute must be new - assert_true(newSub->state == State::New || newSub->state == State::Done); - - // link substitute -- with this responsibilities are transfered - substitute = newSub; - - // connect substitute to parent - substitute->parent = this; - - // remember that a substitute has been assigned - substituted = true; - - // if the split task is done, this one is done - if (substitute->isDone()) { - - // update state - if (state == State::Blocked) setState(State::Ready); - - // pass through running - setState(State::Running); - - // finish this task - finish(); - - // done - return; - } - - // adapt substitute - substitute->adopt(this->family, this->path); - - // and update this state to ready - if (state == State::Blocked) setState(State::Ready); - - // since the substitute may be processed any time, this may finish - // any time => thus it is in the running state - setState(State::Running); - - // start the substitute - substitute->start(); - - } - - private: - - bool isValidTransition(State from, State to) { - return (from == State::New && to == State::Blocked ) || - (from == State::Blocked && to == State::Ready ) || - (from == State::Ready && to == State::Running ) || - (from == State::Running && to == State::Aggregating ) || - (from == State::Aggregating && to == State::Done ) ; - } - - void setState(State newState) { - - // check correctness of state transitions - assert_true(isValidTransition(state,newState)) - << "Illegal state transition from " << state << " to " << newState; - - // make sure that the task is not released with active dependencies - assert_true(newState != State::Ready || num_active_dependencies == 1 || substituted) - << "Active dependencies: " << num_active_dependencies; - - // update the state - state = newState; - LOG_TASKS( "Updated state: " << *this ); - } - - void childDone(const TaskBase& child) { - - // this task must not be done yet - assert_ne(state,State::Done); - - // check whether it is the substitute - if (substitute == &child) { - - // check state of this task - assert_true(State::Ready == state || State::Running == state) - << "Actual state: " << state; - - // log state change - LOG_TASKS( "Substitute " << *substitute << " of " << *this << " done"); - - // trigger completion of task - finish(); - return; - } - - // make sure this task is still running - assert_eq(State::Running, state) - << "\tis substitute: " << (substitute == &child) << "\n" - << "\tis child left: " << (left == &child) << "\n" - << "\tis child right: " << (right == &child) << "\n"; - - // process a split-child - LOG_TASKS( "Child " << child << " of " << *this << " done" ); - - // if this is a sequential node, start next child - if (!parallel && &child == left) { - - // continue with the right child - if (right->getState() != State::Done) { - right->start(); - } else { - // notify that the right child is also done - childDone(*right); - } - - } - - // decrement active child count - unsigned old_child_count = alive_child_counter.fetch_sub(1); - - // log alive counter - LOG_TASKS( "Child " << child << " of " << *this << " -- alive left: " << (old_child_count - 1) ); - - // check whether this was the last child - if (old_child_count != 1) return; - - // the last child finished => finish this task - finish(); - - // LOG_TASKS( "Child " << child << " of " << *this << " done - processing complete" ); - } - - void parentDone() { - - // check that there is a parent - assert_true(parent); - - // signal that one more dependency is satisfied - dependencyDone(); - - } - - // Running -> Aggregating -> Done - void finish() { - - LOG_TASKS( "Finishing task " << *this ); - - // check precondition - assert_true(state == State::Running) - << "Actual State: " << state << "\nTask: " << *this; - - - // update state to aggregation - setState(State::Aggregating); - - // log aggregation step - LOG( "Aggregating task " << *this ); - - // aggregate result (collect results) - aggregate(); - - // a tool to release dependent tasks - auto release = [](TaskBase* task) { - assert_true(!task || task->isDone()); - if (!task) return; - task->parentDone(); - }; - - // cut lose children - release(left); - release(right); - - // cut lose substitutes - release(substitute); - - // log completion - LOG( "Aggregating task " << *this << " complete" ); - - // job is done - setState(State::Done); - - // copy parent pointer to stack, since the markDone may release this task - TaskBase* locParent = parent; - - // inform the family that the job is done - if (!parent || parent->substitute != this) { - // only due this if you are not the substitute - if (family) family->markDone(path); - - // if there is no parent, don't wait for it to signal its release - if (!parent) dependencyDone(); - } - - // notify parent - if (locParent) { - - // notify parents - parent->childDone(*this); - - } - - } - - // -- support printing of tasks for debugging -- - - friend std::ostream& operator<<(std::ostream& out, const TaskBase& task) { - - // if substituted, print the task and its substitute - if (task.substitute) { - out << task.getId() << " -> " << *task.substitute; - return out; - } - - // if split, print the task and its children - if (task.isSplit()) { - out << task.getId() << " : " << task.state; - if (task.state == State::Done) return out; - - out << " = " << (task.parallel ? "parallel" : "sequential") << " ["; - if (task.left) out << *task.left; else out << "nil"; - out << ","; - if (task.right) out << *task.right; else out << "nil"; - out << "]"; - return out; - } - - // in all other cases, just print the id - out << task.getId() << " : " << task.state; - - // get the total number of dependencies - std::size_t numDependencies = task.num_active_dependencies; - - // remove release dependency - if (task.state == State::New) numDependencies -= 1; - - // remove delete dependency - numDependencies -= 1; - - // print number of task dependencies - if (task.state <= State::Blocked) { - out << " waiting for " << numDependencies << " task(s)"; - } - - return out; - } - - template - friend class SplitableTask; - - // --- debugging --- - - private: - - static std::mutex& getTaskRegisterLock() { - static std::mutex lock; - return lock; - } - - static std::set& getTaskRegister() { - static std::set instances; - return instances; - } - - static void registerTask(const TaskBase& task) { - std::lock_guard g(getTaskRegisterLock()); - getTaskRegister().insert(&task); - } - - static void unregisterTask(const TaskBase& task) { - std::lock_guard g(getTaskRegisterLock()); - auto pos = getTaskRegister().find(&task); - assert_true(pos!=getTaskRegister().end()); - getTaskRegister().erase(pos); - } - - public: - - static void dumpAllTasks(std::ostream& out) { - std::lock_guard g(getTaskRegisterLock()); - - // check whether monitoring is enabled - if (!MONITORING_ENABLED) { - out << " -- task tracking disabled, enable by setting MONITORING_ENABLED to true --\n"; - return; - } - - // list active tasks - std::cout << "List of all tasks:\n"; - for(const auto& cur : getTaskRegister()) { - std::cout << "\t" << *cur << "\n"; - } - } - - }; - - - // ----------- Task Dependency Manager Implementations --------------- - - template - void TaskDependencyManager::addDependency(TaskBase* x, const TaskPath& y) { - - // locate entry - std::size_t pos = getPosition(y); - - // load epoch - auto curEpoch = epoch.load(); - - // load the head - Entry* head = data[pos].load(); - - // check whether we are still in the same epoch - if (curEpoch != epoch.load()) { - // the epoch has changed, the previous is gone - x->dependencyDone(); - return; - } - - // check whether this task is already completed - if (isDone(head)) { - // signal that this dependency is done - x->dependencyDone(); - return; - } - - // insert element - Entry* entry = new Entry(); - entry->task = x; - entry->next = head; - - // update entry pointer lock-free - while (!data[pos].compare_exchange_weak(entry->next,entry)) { - - // check whether the task has been completed in the meanwhile - if (isDone(entry->next)) { - delete entry; - // signal that this dependency is done - x->dependencyDone(); - return; - } - - // otherwise, repeat until it worked - } - - // successfully inserted - } - - template - void TaskDependencyManager::markComplete(const TaskPath& task) { - - // ignore tasks that are too small - if (task.getLength() > max_depth) return; - - // mark as complete and obtain head of depending list - auto pos = getPosition(task); - Entry* cur = data[pos].exchange((Entry*)0x1); - - // do not process list twice (may be called multiple times due to substitutes) - if (isDone(cur)) return; - - // signal the completion of this task - while(cur) { - - // signal a completed dependency - cur->task->dependencyDone(); - - // move on to next entry - Entry* next = cur->next; - delete cur; - cur = next; - } - - // and its children - if (pos >= num_entries/2) return; - markComplete(task.getLeftChildPath()); - markComplete(task.getRightChildPath()); - } - - // ------------------------------------------------------------------- - - - - // ------------------------- Task Reference -------------------------- - - inline task_reference::task_reference(const TaskBase& task) - : family(task.getTaskFamily()), path(task.getTaskPath()) { - assert_false(task.isOrphan()) << "Unable to reference an orphan task!"; - } - - // ------------------------------------------------------------------- - - - // a task computing a value of type T - template - class Task : public TaskBase { - - T value; - - mutable PromisePtr promise; - - public: - - Task() : TaskBase(), promise(nullptr) {} - - Task(const T& value) - : TaskBase(true), value(value), promise(nullptr) {} - - Task(TaskBase* left, TaskBase* right, bool parallel) - : TaskBase(left, right, parallel), promise(nullptr) {} - - - virtual ~Task(){}; - - const T& getValue() const { - assert_true(isDone()) << this->getState(); - return value; - } - - void setPromise(const PromisePtr& newPromise) const { - - // this task must not be started yet - assert_eq(State::New,this->getState()); - - // there must not be a previous promise - assert_false(promise); - - // register promise - promise = newPromise; - } - - protected: - - void execute() override { - value = computeValue(); - } - - void aggregate() override { - value = computeAggregate(); - if(promise) { - promise->setValue(value); - } - } - - virtual T computeValue() { - // the default does nothing - return value; - }; - - virtual T computeAggregate() { - // nothing to do by default - return value; - }; - - virtual RuntimePredictor& getRuntimePredictor() const override { - assert_fail() << "Should not be reachable, predictions only intresting for splitable tasks!"; - return reference::getRuntimePredictor(); - } - }; - - template<> - class Task : public TaskBase { - - mutable PromisePtr promise; - - public: - - Task(bool done = false) : TaskBase(done) {} - - Task(TaskBase* left, TaskBase* right, bool parallel) - : TaskBase(left,right,parallel) {} - - virtual ~Task(){}; - - void getValue() const { - } - - void setPromise(const PromisePtr& newPromise) const { - - // this task must not be started yet - assert_eq(State::New,this->getState()); - - // there must not be a previous promise - assert_false(promise); - - // register promise - promise = newPromise; - } - - protected: - - void execute() override { - computeValue(); - } - - void aggregate() override { - computeAggregate(); - if(promise) { - promise->setReady(); - } - } - - virtual void computeValue() {}; - - virtual void computeAggregate() {}; - - virtual RuntimePredictor& getRuntimePredictor() const override { - assert_fail() << "Should not be reachable, predictions only intresting for splitable tasks!"; - return reference::getRuntimePredictor(); - } - }; - - - template< - typename Process, - typename R = std::result_of_t - > - class SimpleTask : public Task { - - Process task; - - public: - - SimpleTask(const Process& task) - : Task(), task(task) {} - - R computeValue() override { - return task(); - } - - virtual RuntimePredictor& getRuntimePredictor() const override { - return reference::getRuntimePredictor(); - } - - }; - - - template< - typename Process, - typename Split, - typename R = std::result_of_t - > - class SplitableTask : public Task { - - Process task; - Split decompose; - - Task* subTask; - - public: - - SplitableTask(const Process& c, const Split& d) - : Task(), task(c), decompose(d), subTask(nullptr) { - // mark this task as one that can be split - TaskBase::setSplitable(); - } - - R computeValue() override { - // this should not be called if split - assert_false(subTask); - return task(); - } - - R computeAggregate() override { - // the aggregated value depends on whether it was split or not - return (subTask) ? subTask->getValue() : Task::computeAggregate(); - } - - bool split() override; - - virtual RuntimePredictor& getRuntimePredictor() const override { - return reference::getRuntimePredictor(); - } - - }; - - template - class SplitTask : public Task { - - const Task& left; - const Task& right; - - C merge; - - public: - - SplitTask(Task* left, Task* right, C&& merge, bool parallel) - : Task(left,right,parallel), - left(*left), - right(*right), - merge(merge) {} - - - R computeValue() override { - // should not be reached - assert_fail() << "Should always be split!"; - return {}; - } - - R computeAggregate() override { - return merge(left.getValue(),right.getValue()); - } - - virtual RuntimePredictor& getRuntimePredictor() const override { - assert_fail() << "Should not be reachable, predictions only intresting for splitable tasks!"; - return reference::getRuntimePredictor(); - } - }; - - template - class SplitTask : public Task { - public: - - SplitTask(TaskBase* left, TaskBase* right, bool parallel) - : Task(left,right,parallel) {} - - void computeValue() override { - // should not be reached - assert_fail() << "Should always be split!"; - } - - void computeAggregate() override { - // nothing to do - } - - virtual RuntimePredictor& getRuntimePredictor() const override { - assert_fail() << "Should not be reachable, predictions only intresting for splitable tasks!"; - return reference::getRuntimePredictor(); - } - }; - - template> - Task* make_split_task(Deps&& deps, Task* left, Task* right, C&& merge, bool parallel) { - Task* res = new SplitTask(left, right, std::move(merge), parallel); - res->addDependencies(deps.begin(), deps.end()); - return res; - } - - template - Task* make_split_task(Deps&& deps, TaskBase* left, TaskBase* right, bool parallel) { - Task* res = new SplitTask(left, right, parallel); - res->addDependencies(deps.begin(), deps.end()); - return res; - } - - - - - - // --------------------------------------------------------------------------------------------- - // Treetures - // --------------------------------------------------------------------------------------------- - - - namespace detail { - - /** - * A common base class for all treetures, providing common functionality. - */ - template - class treeture_base { - - template - friend class SplitableTask; - - protected: - - task_reference taskRef; - - PromisePtr promise; - - treeture_base() : promise() {} - - treeture_base(const Task& task) : promise(std::make_shared>()) { - - // make sure task has not been started yet - assert_eq(TaskBase::State::New, task.getState()); - - // register the promise - task.setPromise(promise); - - // also create task reference if available - if (!task.isOrphan()) { - taskRef = task_reference(task); - } - } - - treeture_base(PromisePtr&& promise) - : promise(std::move(promise)) { - - // make sure the promise is valid and set - assert_true(this->promise); - assert_true(this->promise->isReady()); - - } - - public: - - using value_type = T; - - treeture_base(const treeture_base&) = delete; - treeture_base(treeture_base&& other) = default; - - treeture_base& operator=(const treeture_base&) = delete; - treeture_base& operator=(treeture_base&& other) = default; - - void wait() const; - - bool isDone() const { - return !promise || promise->isReady(); - } - - bool isValid() const { - return (bool)promise; - } - - task_reference getLeft() const { - return getTaskReference().getLeft(); - } - - task_reference getRight() const { - return getTaskReference().getRight(); - } - - task_reference getTaskReference() const { - return taskRef; - } - - operator task_reference() const { - return getTaskReference(); - } - - }; - - } - - /** - * A treeture, providing a reference to the state of a task as well as to - * the computed value upon completion. - */ - template - class treeture : public detail::treeture_base { - - using super = detail::treeture_base; - - friend class unreleased_treeture; - - protected: - - treeture(const Task& task) : super(task) {} - - public: - - using treeture_type = treeture; - - treeture() {} - - treeture(const T& value) - : super(std::make_shared>(value)) {} - - treeture(const treeture&) = delete; - treeture(treeture&& other) = default; - - treeture& operator=(const treeture&) = delete; - treeture& operator=(treeture&& other) = default; - - const T& get() { - static const T defaultValue = T(); - if (!this->promise) return defaultValue; - super::wait(); - return this->promise->getValue(); - } - - }; - - /** - * A specialization of the general value treeture for the void type, exhibiting - * a modified signature for the get() member function. - */ - template<> - class treeture : public detail::treeture_base { - - using super = detail::treeture_base; - - friend class unreleased_treeture; - - protected: - - treeture(const Task& task) : super(task) {} - - public: - - treeture() : super() {} - - treeture(const treeture&) = delete; - treeture(treeture&& other) = default; - - treeture& operator=(const treeture&) = delete; - treeture& operator=(treeture&& other) = default; - - void get() { - wait(); - } - - }; - - - - template - bool SplitableTask::split() { - // do not split a second time - if (!TaskBase::isSplitable()) return false; - - assert_true(TaskBase::State::Blocked == this->state || TaskBase::State::Ready == this->state) - << "Actual state: " << this->state; - - // decompose this task - Task* substitute = decompose().toTask(); - assert_true(substitute); - assert_true(substitute->state == TaskBase::State::New || substitute->state == TaskBase::State::Done); - - // record reference to sub-task - subTask = substitute; - - // mark as no longer splitable - TaskBase::setSplitable(false); - - // mutate to new task - Task::setSubstitute(substitute); - - // done - return true; - } - - - - // --------------------------------------------------------------------------------------------- - // Unreleased Treetures - // --------------------------------------------------------------------------------------------- - - namespace detail { - - template - struct done_task_to_treeture { - treeture operator()(const Task& task) { - return treeture(task.getValue()); - } - }; - - template<> - struct done_task_to_treeture { - treeture operator()(const Task&) { - return treeture(); - } - }; - } - - - /** - * A handle to a yet unreleased task. - */ - template - class unreleased_treeture { - - Task* task; - - public: - - using value_type = T; - - using treeture_type = treeture; - - unreleased_treeture(Task* task) - : task(task) {} - - unreleased_treeture(const unreleased_treeture&) =delete; - - unreleased_treeture(unreleased_treeture&& other) : task(other.task) { - other.task = nullptr; - } - - unreleased_treeture& operator=(const unreleased_treeture&) =delete; - - unreleased_treeture& operator=(unreleased_treeture&& other) { - std::swap(task,other.task); - return *this; - } - - ~unreleased_treeture() { - if(task) { - assert_fail() - << "Did you forget to release a treeture?"; - } - } - - treeture release() && { - - // there has to be a task - assert_true(task); - - // special case for completed tasks - if (task->isDone()) { - auto res = detail::done_task_to_treeture()(*task); - task->dependencyDone(); // remove one dependency for the lose of the owner - task = nullptr; - return res; - } - - // the referenced task has not been released yet - assert_eq(TaskBase::State::New,task->getState()); - - // create the resulting treeture - treeture res(*task); - - // start the task -- the actual release - task->start(); - - // reset the task pointer - task = nullptr; - - // return the resulting treeture - return res; - } - - operator treeture() && { - return std::move(*this).release(); - } - - T get() && { - return std::move(*this).release().get(); - } - - Task* toTask() && { - auto res = task; - task = nullptr; - return res; - } - - }; - - - - // --------------------------------------------------------------------------------------------- - // Operators - // --------------------------------------------------------------------------------------------- - - - - inline dependencies> after() { - return dependencies>(); - } - - template - auto after(const task_reference& r, const Rest& ... rest) { - return dependencies>(r,rest...); - } - - inline dependencies after(std::vector&& refs) { - return std::move(refs); - } - - - template - unreleased_treeture done(dependencies&& deps) { - auto res = new Task(true); - res->addDependencies(deps.begin(),deps.end()); - return res; - } - - inline unreleased_treeture done() { - return done(after()); - } - - template - unreleased_treeture done(dependencies&& deps, const T& value) { - auto res = new Task(value); - res->addDependencies(deps.begin(),deps.end()); - return res; - } - - template - unreleased_treeture done(const T& value) { - return done(after(),value); - } - - namespace runtime { - - // determines whether this thread is running in a nested context - bool isNestedContext(); - - } - - namespace detail { - - template - unreleased_treeture init(Deps&& deps, Task* task) { - - // add dependencies - task->addDependencies(deps.begin(),deps.end()); - - // create task family if requested - if (root) { - task->adopt(createFamily(!runtime::isNestedContext())); - } - - // done - return task; - } - - } - - - template> - unreleased_treeture spawn(dependencies&& deps, Action&& op) { - // create and initialize the task - return detail::init(std::move(deps), (Task*)(new SimpleTask(std::move(op)))); - } - - template - auto spawn(Action&& op) { - return spawn(after(),std::move(op)); - } - - template> - unreleased_treeture spawn(Deps&& deps, Action&& op, Split&& split) { - // create and initialize the task - return detail::init(std::move(deps), (Task*)(new SplitableTask(std::move(op),std::move(split)))); - } - - template - auto spawn(Action&& op, Split&& split) { - return spawn(after(),std::move(op),std::move(split)); - } - - template - unreleased_treeture seq(Deps&& deps) { - return done(std::move(deps)); - } - - inline unreleased_treeture seq() { - return done(); - } - - template - unreleased_treeture seq(dependencies&& deps, unreleased_treeture&& a, unreleased_treeture&& b) { - return make_split_task(std::move(deps),std::move(a).toTask(),std::move(b).toTask(),false); - } - - template - unreleased_treeture seq(unreleased_treeture&& a, unreleased_treeture&& b) { - return seq(after(),std::move(a),std::move(b)); - } - - template - unreleased_treeture seq(dependencies&& deps, unreleased_treeture&& f, unreleased_treeture&& ... rest) { - // TODO: conduct a binary split to create a balanced tree - return make_split_task(std::move(deps),std::move(f).toTask(),seq(std::move(rest)...).toTask(),false); - } - - template - unreleased_treeture seq(unreleased_treeture&& f, unreleased_treeture&& ... rest) { - return seq(after(), std::move(f),std::move(rest)...); - } - - template - unreleased_treeture par(Deps&& deps) { - return done(std::move(deps)); - } - - inline unreleased_treeture par() { - return done(); - } - - template - unreleased_treeture par(dependencies&& deps, unreleased_treeture&& a, unreleased_treeture&& b) { - return make_split_task(std::move(deps),std::move(a).toTask(),std::move(b).toTask(),true); - } - - template - unreleased_treeture par(unreleased_treeture&& a, unreleased_treeture&& b) { - return par(after(),std::move(a),std::move(b)); - } - - template - unreleased_treeture par(dependencies&& deps, unreleased_treeture&& f, unreleased_treeture&& ... rest) { - // TODO: conduct a binary split to create a balanced tree - return make_split_task(std::move(deps),std::move(f).toTask(),par(std::move(deps),std::move(rest)...).toTask(),true); - } - - template - unreleased_treeture par(unreleased_treeture&& f, unreleased_treeture&& ... rest) { - return par(after(), std::move(f),std::move(rest)...); - } - - - - template> - unreleased_treeture combine(dependencies&& deps, unreleased_treeture&& a, unreleased_treeture&& b, M&& m, bool parallel = true) { - return make_split_task(std::move(deps),std::move(a).toTask(),std::move(b).toTask(),std::move(m),parallel); - } - - template> - unreleased_treeture combine(unreleased_treeture&& a, unreleased_treeture&& b, M&& m, bool parallel = true) { - return reference::combine(after(),std::move(a),std::move(b),std::move(m),parallel); - } - - - // --------------------------------------------------------------------------------------------- - // Runtime - // --------------------------------------------------------------------------------------------- - - namespace runtime { - - - - // ----------------------------------------------------------------- - // Worker Pool - // ----------------------------------------------------------------- - - class Worker; - - thread_local static Worker* tl_worker = nullptr; - - static void setCurrentWorker(Worker& worker) { - tl_worker = &worker; - } - - static Worker& getCurrentWorker(); - - namespace detail { - - /** - * A utility to fix the affinity of the current thread to the given core. - * Does not do anything on operating systems other than linux. - */ - #ifdef __linux__ - inline void fixAffinity(int core) { - // fix affinity if user does not object - if(std::getenv("NO_AFFINITY") == nullptr) { - int num_cores = std::thread::hardware_concurrency(); - cpu_set_t mask; - CPU_ZERO(&mask); - CPU_SET(core % num_cores, &mask); - pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mask); - } - } - #else - inline void fixAffinity(int) { } - #endif - - } - - class WorkerPool; - - - class Worker { - - using duration = RuntimePredictor::duration; - - // the targeted maximum queue length - // (more like a guideline, may be exceeded due to high demand) - enum { max_queue_length = 8 }; - - WorkerPool& pool; - - volatile bool alive; - - // list of tasks ready to run - OptimisticUnboundQueue queue; - - std::thread thread; - - unsigned id; - - // the list of workers to attempt to steel from, in order - std::vector stealingOrder; - - public: - - Worker(WorkerPool& pool, unsigned id) - : pool(pool), alive(true), id(id) { } - - Worker(const Worker&) = delete; - Worker(Worker&&) = delete; - - Worker& operator=(const Worker&) = delete; - Worker& operator=(Worker&&) = delete; - - void start() { - thread = std::thread([&](){ run(); }); - } - - void poison() { - alive = false; - } - - void join() { - thread.join(); - } - - void dumpState(std::ostream& out) const { - out << "Worker " << id << " / " << thread.get_id() << ":\n"; - out << "\tQueue:\n"; - for(const auto& cur : queue.getSnapshot()) { - out << "\t\t" << *cur << "\n"; - } - } - - private: - - void run(); - - void runTask(TaskBase& task); - - bool splitTask(TaskBase& task); - - duration estimateRuntime(const TaskBase& task) { - return task.getRuntimePredictor().predictTime(task.getDepth()); - } - - public: - - void schedule(TaskBase& task); - - bool schedule_step(); - - }; - - class WorkerPool { - - std::vector workers; - - // tools for managing idle threads - std::mutex m; - std::condition_variable cv; - - public: - - WorkerPool() { - - int numWorkers = std::thread::hardware_concurrency(); - - // parse environment variable - if (char* val = std::getenv("NUM_WORKERS")) { - auto userDef = std::atoi(val); - if (userDef != 0) numWorkers = userDef; - } - - // there must be at least one worker - if (numWorkers < 1) numWorkers = 1; - - // create workers - for(int i=0; istart(); - } - - // make worker 0 being linked to the main thread - setCurrentWorker(*workers.front()); - - // fix affinity of main thread - detail::fixAffinity(0); - - // fix worker id of main thread - setCurrentWorkerID(0); - - } - - ~WorkerPool() { - // shutdown threads - - { - // poison all workers - std::lock_guard guard(m); - for(auto& cur : workers) { - cur->poison(); - } - - // make work available - workAvailable(); - } - - // wait for their death - for(std::size_t i=1; ijoin(); - } - - // free resources - for(auto& cur : workers) { - delete cur; - } - - } - - static WorkerPool& getInstance() { - static WorkerPool pool; - return pool; - } - - int getNumWorkers() const { - return (int)workers.size(); - } - - private: - - mutable std::size_t initialLimit = std::numeric_limits::max(); - - public: - - std::size_t getInitialSplitDepthLimit() const { - if (initialLimit == std::numeric_limits::max()) { - std::size_t i = 0; - auto num_workers = getNumWorkers(); - while ((1<& getWorkers() const { - return workers; - } - - Worker& getWorker() { - return getWorker(0); - } - - void dumpState(std::ostream& out) { - for(const auto& cur : workers) { - cur->dumpState(out); - } - } - - protected: - - friend Worker; - - void waitForWork(volatile bool& alive) { - std::unique_lock lk(m); - if (!alive) return; - LOG_SCHEDULE("Going to sleep"); - cv.wait(lk); - LOG_SCHEDULE("Woken up again"); - } - - void workAvailable() { - // wake up all workers - cv.notify_all(); - } - - }; - - static Worker& getCurrentWorker() { - if (tl_worker) return *tl_worker; - return WorkerPool::getInstance().getWorker(); - } - - inline void Worker::run() { - - // fix worker ID - setCurrentWorkerID(id); - - // copy worker list - auto allWorkers = pool.getWorkers(); - - // a utility to add new steel targets - auto addStealTarget = [&](std::size_t idx) { - if (idx == id) return; - stealingOrder.push_back(allWorkers[idx]); - }; - - // create list of workers to steel from - auto numWorkers = allWorkers.size(); - for(std::size_t d=1; d 100000) { - - // report sleep event - logProfilerEvent(ProfileLogEntry::createWorkerSuspendedEntry()); - - // wait for work by putting thread to sleep - pool.waitForWork(alive); - - // report awakening - logProfilerEvent(ProfileLogEntry::createWorkerResumedEntry()); - - // reset cycles counter - idle_cycles = 0; - } - } - } - - // log worker termination event - logProfilerEvent(ProfileLogEntry::createWorkerDestroyedEntry()); - - // done - - } - - inline bool& getIsNestedFlag() { - static thread_local bool nested = false; - return nested; - } - - inline bool isNestedContext() { - return getIsNestedFlag(); - } - - inline void Worker::runTask(TaskBase& task) { - - // the splitting of a task may provide a done substitute => skip those - if (task.isDone()) return; - - LOG_SCHEDULE("Starting task " << task); - - // no substituted task may be processed - assert_false(task.isSubstituted()); - - // make sure this is a ready task - assert_eq(TaskBase::State::Ready,task.getState()); - - // mark as nested - bool& nestedContextFlag = getIsNestedFlag(); - bool old = nestedContextFlag; - nestedContextFlag = true; - - // process the task - if (task.isSplit()) { - task.run(); - } else { - - __allscale_unused auto taskId = task.getId(); - logProfilerEvent(ProfileLogEntry::createTaskStartedEntry(taskId)); - - // check whether this run needs to be sampled - auto level = task.getDepth(); - if (level == 0) { - - // level 0 does not need to be recorded (orphans) - task.run(); - - } else { - - // get predictor before task by be gone (as part of the processing) - RuntimePredictor& predictor = task.getRuntimePredictor(); - - // take the time to make predictions - auto start = RuntimePredictor::clock::now(); - task.run(); - auto time = RuntimePredictor::clock::now() - start; - - predictor.registerTime(level,time); - - } - - logProfilerEvent(ProfileLogEntry::createTaskEndedEntry(taskId)); - - } - - // reset old nested context state - nestedContextFlag = old; - - LOG_SCHEDULE("Finished task " << task); - } - - inline bool Worker::splitTask(TaskBase& task) { - using namespace std::chrono_literals; - - // the threshold for estimated task to be split - static const auto taskTimeThreshold = CycleCount(3*1000*1000); - - // only split the task if it is estimated to exceed a threshold - if (task.isSplitable() && (task.getDepth() == 0 || estimateRuntime(task) > taskTimeThreshold)) { - - // split this task - return task.split(); - - } - - // no split happend - return false; - } - - inline void Worker::schedule(TaskBase& task) { - - // assert that task has no unfinished dependencies - assert_true(task.isReady()); - - // no task that is substituted shall be scheduled - assert_false(task.isSubstituted()); - - - // actively distribute initial tasks, by assigning them to different workers - - // TODO: do the following only for top-level tasks!! - - if (!task.isOrphan() && task.getTaskFamily()->isTopLevel()) { - - // get the limit for initial decomposition - auto split_limit = pool.getInitialSplitDepthLimit(); - - // if below this limit, split the task - if (task.isSplitable() && task.getDepth() < split_limit) { - - // if splitting worked => we are done - if (task.split()) return; - - } - - // the depth limit for task being actively distributed - auto distribution_limit = split_limit + 2; - - // actively distribute tasks throughout the pool - if (task.getDepth() < distribution_limit) { - - // actively select the worker to issue the task to - std::size_t num_workers = pool.getNumWorkers(); - auto path = task.getTaskPath().getPath(); - auto depth = task.getDepth(); - - auto trgWorker = (depth==0) ? 0 : (path * num_workers) / ((uint64_t)1 << depth); - - // check the computation of the target worker - assert_lt(trgWorker,(std::size_t)pool.getNumWorkers()) - << "Error in target worker computation:\n" - << "\tNumWorkers: " << num_workers << "\n" - << "\tPath: " << path << "\n" - << "\tDepth: " << depth << "\n" - << "\tTarget: " << trgWorker << "\n"; - - - // if the target is another worker => send the task there - if (trgWorker != id) { - - // submit this task to the selected worker - pool.getWorker((int)trgWorker).schedule(task); - - // done - return; - - } - } - } - - // add task to queue - LOG_SCHEDULE( "Queue size before: " << queue.size() ); - - // no task that is substituted shall be scheduled - assert_false(task.isSubstituted()); - - // add task to queue - queue.push_back(&task); - - // signal available work - pool.workAvailable(); - - // log new queue length - LOG_SCHEDULE( "Queue size after: " << queue.size() ); - - } - - - inline bool Worker::schedule_step() { - - // process a task from the local queue - if (TaskBase* t = queue.pop_front()) { - - // the task should not have a substitute - assert_false(t->isSubstituted()); - - // check precondition of task - assert_true(t->isReady()) << "Actual state: " << t->getState(); - - // if the queue is not full => create more tasks - if (queue.size() < (max_queue_length*3)/4) { - - LOG_SCHEDULE( "Splitting tasks @ queue size: " << queue.size() ); - - - - // split task and be done - if (splitTask(*t)) return true; - - // the task should not have a substitute - assert_false(t->isSubstituted()); - - } - - // process this task - runTask(*t); - return true; - } - - // look through potential targets to steel a task - for(const auto& cur : stealingOrder) { - - // otherwise, steal a task from another worker - Worker& other = *cur; - - // try to steal a task from another queue - if (TaskBase* t = other.queue.try_pop_back()) { - - // the task should not have a substitute - assert_false(t->isSubstituted()); - - // log creation of worker event - logProfilerEvent(ProfileLogEntry::createTaskStolenEntry(t->getId())); - - LOG_SCHEDULE( "Stolen task: " << t ); - - // split task the task (since there is not enough work in the queue) - if (splitTask(*t)) return true; - - // the task should not have a substitute - assert_false(t->isSubstituted()); - - // process task - runTask(*t); - return true; // successfully completed a task - } - - } - - // no task found => wait a moment - cpu_relax(); - - // report back the failed steal attempt - return false; - } - - } - - namespace monitoring { - - inline std::ostream& operator<<(std::ostream& out, const Event& e) { - switch(e.type) { - case EventType::Run: return out << "Running task " << *e.task; - case EventType::RunDirect: return out << "Running direct task " << *e.task; - case EventType::Split: return out << "Splitting task " << *e.task; - case EventType::Wait: return out << "Waiting for task " << *e.task; - case EventType::DependencyWait: return out << "Waiting for dependency: " << e.taskId; - } - return out << "Unknown Event"; - } - - }// end namespace monitoring - - - inline void TaskBase::start() { - LOG_TASKS("Starting " << *this ); - - // check that the given task is a new task - assert_eq(TaskBase::State::New, state); - - // move to next state - setState(State::Blocked); - - // if below the initial split limit, split this task - if (!isOrphan() && getTaskFamily()->isTopLevel() && isSplitable() && getDepth() < runtime::WorkerPool::getInstance().getInitialSplitDepthLimit()) { - - // attempt to split this task - split(); - - } - - // release dummy-dependency to get task started - dependencyDone(); - } - - inline void TaskBase::dependencyDone() { - - // keep a backup in case the object is destroyed asynchronously - auto substitutedLocalCopy = substituted.load(); - - // decrease the number of active dependencies - int oldValue = num_active_dependencies.fetch_sub(1); - - // compute the new value - int newValue = oldValue - 1; - - // make sure there are no releases that should not be - assert_le(0,newValue); - - // if we are down to 0 => destroy this task - if (newValue == 0) { - - // at this point this task must be done - assert_eq(State::Done,state); - - // destroy this object, and be done - delete this; - return; - } - - // if the new value is not 1 => ignore - if (newValue != 1) return; - - // if the value is 1, we release this task for computation - assert_eq(1,newValue); - - // handle substituted instances by ignoring the message - if (substitutedLocalCopy || substituted) return; - - // make sure that at this point there is still a parent left - assert_eq(num_active_dependencies, 1); - - // at this point the state must not be new - assert_ne(State::New, state) - << "A new task must not reach a state where its last dependency is released."; - - // actually, every task here must be in blocked state - assert_eq(State::Blocked, state) << *this << "\t" << substitutedLocalCopy << "\n"; - - // update the state to ready - // (this can only be reached by one thread) - setState(State::Ready); - - // schedule task - runtime::getCurrentWorker().schedule(*this); - - } - - inline void TaskBase::wait() { - // log this event - // auto action = monitoring::log(monitoring::EventType::Wait, this); - - LOG_TASKS("Waiting for " << *this ); - - // check that this task has been started before - assert_lt(State::New,state); - - // wait until this task is finished - while(!isDone()) { - // make some progress - runtime::getCurrentWorker().schedule_step(); - } - } - - inline void task_reference::wait() const { - // log this event - // auto action = monitoring::log(monitoring::EventType::DependencyWait, TaskID(family->getId(),path)); - - // wait until the referenced task is done - while(!isDone()) { - // but while doing so, do useful stuff - runtime::getCurrentWorker().schedule_step(); - } - } - - namespace detail { - - template - void treeture_base::wait() const { - // wait for completion - while (promise && !promise->isReady()) { - // make some progress - runtime::getCurrentWorker().schedule_step(); - } - } - - } - -} // end namespace reference -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale - - -inline void __dumpRuntimeState() { - std::cout << "\n ------------------------- Runtime State Dump -------------------------\n"; - allscale::api::core::impl::reference::monitoring::ThreadState::dumpStates(std::cout); - allscale::api::core::impl::reference::runtime::WorkerPool::getInstance().dumpState(std::cout); - allscale::api::core::impl::reference::TaskBase::dumpAllTasks(std::cout); - std::cout << "\n ----------------------------------------------------------------------\n"; -} diff --git a/vendor/allscale/api/core/impl/sequential/treeture.h b/vendor/allscale/api/core/impl/sequential/treeture.h deleted file mode 100644 index c40e0ac4c..000000000 --- a/vendor/allscale/api/core/impl/sequential/treeture.h +++ /dev/null @@ -1,335 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/printer/arrays.h" - -namespace allscale { -namespace api { -namespace core { -namespace impl { -namespace sequential { - - - // -------------------------------------------------------------------------------------------- - // sequential treeture implementation - // -------------------------------------------------------------------------------------------- - - - // ------------------------------------- Declarations ----------------------------------------- - - /** - * The actual treeture, referencing the computation of a value. - */ - template - class treeture; - - /** - * A treeture not yet released to the runtime system for execution. - */ - template - class unreleased_treeture; - - /** - * A handle for a lazily constructed unreleased treeture. This intermediate construct is utilized - * for writing templated code that can be optimized to overhead-less computed values and to facilitate - * the support of the sequence combinator. - */ - template - class lazy_unreleased_treeture; - - /** - * A class to reference tasks for synchronization purposes. - */ - class task_reference; - - /** - * A class to model task dependencies - */ - class dependencies; - - - // ------------------------------------- Definitions ------------------------------------------ - - // -- task_reference -- - - class task_reference { - - bool isDone() const { - return true; - } - - void wait() const { - // always done - } - - task_reference& descentLeft() { - return *this; - } - - task_reference& descentRight() { - return *this; - } - - task_reference getLeft() const { - return *this; - } - - task_reference getRight() const { - return *this; - } - - }; - - - // -- treeture -- - - template<> - class treeture : public task_reference { - public: - - using value_type = void; - - treeture() {} - - template - explicit treeture(Fun&& fun) { - fun(); - } - - template - treeture(const treeture& /*other*/) {} - - void get() const { - // nothing to do - } - - }; - - template - class treeture : public task_reference { - - T value; - - public: - - using value_type = T; - - using treeture_type = treeture; - - treeture() {} - - treeture(const T& value) - : value(value) {} - - treeture(const T&& value) - : value(std::move(value)) {} - - template - explicit treeture(Fun&& fun) - : value(fun()) {} - - T get() const { - return value; - } - - }; - - - template> - treeture make_treeture(Op&& op) { - return treeture(std::move(op)); - } - - // -- unreleased_treeture -- - - template - class unreleased_treeture : public task_reference { - - treeture res; - - public: - - using value_type = T; - - using treeture_type = treeture; - - unreleased_treeture() {} - - template - explicit unreleased_treeture(Fun&& fun) - : res(fun()) {} - - unreleased_treeture(const unreleased_treeture&) =delete; - unreleased_treeture(unreleased_treeture&&) =default; - - unreleased_treeture& operator=(const unreleased_treeture&) =delete; - unreleased_treeture& operator=(unreleased_treeture&&) =default; - - treeture release() const && { - return res; - } - - operator treeture() const && { - return std::move(*this).release(); - } - - T get() const && { - return std::move(*this).release().get(); - } - - }; - - template::value_type> - unreleased_treeture make_unreleased_treeture(Gen&& gen) { - return unreleased_treeture(std::move(gen)); - } - - template - class lazy_unreleased_treeture { - - mutable Gen gen; - - public: - - using value_type = T; - - using treeture_type = treeture; - - explicit lazy_unreleased_treeture(Gen&& gen) - : gen(std::move(gen)) {} - - unreleased_treeture toUnreleasedTreeture() const { - return gen(); - } - - treeture release() const { - return toUnreleasedTreeture(); - } - - T get() const { - return release().get(); - } - - operator unreleased_treeture() const { - return toUnreleasedTreeture(); - } - - operator treeture() const { - return release(); - } - - }; - - template::value_type> - lazy_unreleased_treeture make_lazy_unreleased_treeture(Gen&& gen) { - return lazy_unreleased_treeture(std::move(gen)); - } - - /** - * There are no dependencies to be recorded, so this object is an empty object. - */ - class dependencies {}; - - - // -------------------------------------- Operators ------------------------------------------- - - - inline dependencies after() { - return {}; - } - - template - dependencies after(const task_reference&, const Rest& ... rest) { - return after(rest...); - } - - inline dependencies after(const std::vector&) { - return {}; // if it is a task_reference, it is computed - } - - - inline auto done() { - return make_lazy_unreleased_treeture([=](){ - return make_unreleased_treeture([=](){ return treeture(); }); - }); - } - - template - auto done(const T& value) { - return make_lazy_unreleased_treeture([=](){ - return make_unreleased_treeture([=](){ return treeture(value); }); - }); - } - - - template - auto spawn(dependencies&&, Op&& op) { - return make_lazy_unreleased_treeture([=](){ - return make_unreleased_treeture([=](){ return make_treeture(std::move(op)); }); - }); - } - - template - auto spawn(Op&& op) { - return spawn(after(),std::move(op)); - } - - - inline auto seq() { - return done(); - } - - template - auto seq(dependencies&&, lazy_unreleased_treeture&& f, lazy_unreleased_treeture&& ... rest) { - return make_lazy_unreleased_treeture([f,rest...]() mutable { - return make_unreleased_treeture([f,rest...]() mutable { - return make_treeture([f,rest...]() mutable { - f.get(); - seq(std::move(rest)...).get(); - }); - }); - }); - } - - template - auto seq(lazy_unreleased_treeture&& f, lazy_unreleased_treeture&& ... rest) { - return seq(after(), std::move(f),std::move(rest)...); - } - - template - auto par(dependencies&&, lazy_unreleased_treeture&& ... tasks) { - // for the sequential implementation, parallel is the same as sequential - return seq(std::move(tasks)...); - } - - template - auto par(lazy_unreleased_treeture&& ... tasks) { - return par(after(), std::move(tasks)...); - } - - - template - auto combine(dependencies&&, lazy_unreleased_treeture&& a, lazy_unreleased_treeture&& b, M&& m, bool = true) { - return make_lazy_unreleased_treeture([=]() { - return make_unreleased_treeture([=]() { - return make_treeture([=]() { - return m(a.get(),b.get()); - }); - }); - }); - } - - template - auto combine(lazy_unreleased_treeture&& a, lazy_unreleased_treeture&& b, M&& m, bool parallel = true) { - return sequential::combine(after(), std::move(a), std::move(b), std::move(m), parallel); - } - - -} // end namespace sequential -} // end namespace impl -} // end namespace core -} // end namespace api -} // end namespace allscale - diff --git a/vendor/allscale/api/core/io.h b/vendor/allscale/api/core/io.h deleted file mode 100644 index 90727ac38..000000000 --- a/vendor/allscale/api/core/io.h +++ /dev/null @@ -1,575 +0,0 @@ -#pragma once - -#include - -#include "allscale/api/core/impl/reference/io.h" -#include "allscale/utils/serializer.h" - -namespace allscale { -namespace api { -namespace core { - - - // ---------------------------------------------------------------------- - // Declarations - // ---------------------------------------------------------------------- - - - /** - * Supported IO modes for stream based operations. - * @see http://en.cppreference.com/w/cpp/io/c#Binary_and_text_modes - */ - enum class Mode { - Text, Binary - }; - - /** - * An abstraction for a file or buffer to read/write from. - */ - class Entry; - - /** - * An out-of-order stream for reading information from a file/buffer previously - * written using an output stream. - */ - class InputStream; - - /** - * An out-of-order stream for writing information to some file/buffer. - */ - class OutputStream; - - /** - * A utility for reading the content of a storage entity (e.g. a file) through - * memory mapped IO. - */ - class MemoryMappedInput; - - /** - * A utility for reading and writing the content of a storage entity (e.g. a file) through - * memory mapped IO. - */ - class MemoryMappedOutput; - - /** - * An IO manager for in-memory data buffer manipulations. - */ - class BufferIOManager; - - /** - * An IO manager providing access to the file system. - */ - class FileIOManager; - - - - // ---------------------------------------------------------------------- - // Definitions - // ---------------------------------------------------------------------- - - - // -- Stream Based IO --------------------------------------------------- - - /** - * A converter between this interface and the reference implementation - */ - inline impl::reference::Mode toRefMode(Mode mode) { - switch(mode) { - case Mode::Text: return impl::reference::Mode::Text; - case Mode::Binary: return impl::reference::Mode::Binary; - } - assert_fail() << "Invalid mode encountered!"; - return {}; - } - - class Entry { - - friend InputStream; - - friend OutputStream; - - friend MemoryMappedInput; - - friend MemoryMappedOutput; - - template - friend class IOManager; - - using RefEntry = impl::reference::Entry; - - // the wrapped up reference implementation - RefEntry entry; - - // the constructor is private to restrict creation to the corresponding factories - Entry(const RefEntry& entry) : entry(entry) {} - - }; - - - /** - * A stream to read data from some entry of an IO manager. - */ - class InputStream { - - template - friend class IOManager; - - using RefInStream = impl::reference::InputStream; - - // the wrapped up reference implementation - RefInStream& istream; - - InputStream(RefInStream& istream) - : istream(istream) {} - - public: - - InputStream(const InputStream&) = delete; - InputStream(InputStream&&) = default; - - /** - * Obtains the entry this stream is associated to. - */ - Entry getEntry() const { - return istream.getEntry(); - } - - /** - * Provides atomic access to this stream, allowing the given body to - * to perform a sequence of read operations without potential interference - * of other threads. - */ - template - InputStream& atomic(const Body& body) { - istream.atomic(body); - return *this; - } - - /** - * Reads a single instance of the given type (atomic). - */ - template - T read() { - return istream.read(); - } - - /** - * An idiomatic overload of the read operation. - */ - template - InputStream& operator>>(T& trg) { - istream >> trg; - return *this; - } - - /** - * Allows to test whether this stream is in a valid state. It can, for instance, - * be utilized to determine whether there has been an error during the last - * performed operation or whether in text mode the end of a file has been reached. - */ - operator bool() const { - return istream; - } - - // -- make it serializable -- - - static InputStream load(utils::ArchiveReader& a) { - return { RefInStream::load(a) }; - } - - void store(utils::ArchiveWriter& a) const { - istream.store(a); - } - }; - - - - /** - * A stream to write data to some entry of an IO manager. - */ - class OutputStream { - - template - friend class IOManager; - - using RefOutStream = impl::reference::OutputStream; - - RefOutStream& ostream; - - OutputStream(RefOutStream& ostream) - : ostream(ostream) {} - - public: - - OutputStream(const OutputStream&) = delete; - OutputStream(OutputStream&&) = default; - - /** - * Obtains the entry this stream is associated to. - */ - Entry getEntry() const { - return ostream.getEntry(); - } - - /** - * Provides atomic access to this stream, allowing the given body to - * to perform a sequence of write operations without potential interference - * of other threads. - */ - template - OutputStream& atomic(const Body& body) { - ostream.atomic(body); - return *this; - } - - /** - * Writes a single instance of the given type (atomic). - */ - template - OutputStream& write(const T& value) { - ostream.write(value); - return *this; - } - - /** - * An idiomatic overload of the write operation. - */ - template - OutputStream& operator<<(const T& value) { - ostream << value; - return *this; - } - OutputStream& operator<<(const char* value) { - ostream << value; - return *this; - } - - /** - * Allows to test whether this stream is in a valid state. It can, for instance, - * be utilized to determine whether there has been an error during the last - * performed operation. - */ - operator bool() const { - return ostream; - } - - // -- make it serializable -- - - static OutputStream load(utils::ArchiveReader& a) { - return { RefOutStream::load(a) }; - } - - void store(utils::ArchiveWriter& a) const { - ostream.store(a); - } - }; - - - - - - // -- Memory Mapped IO -------------------------------------------------- - - - /** - * A utility for reading the content of a storage entity (e.g. a file) through - * memory mapped IO. - */ - class MemoryMappedInput { - - template - friend class IOManager; - - using Impl = impl::reference::MemoryMappedInput; - - Impl impl; - - MemoryMappedInput(Impl&& impl) : impl(impl) {} - - public: - - /** - * The identifier for the underlying storage entity. - */ - Entry getEntry() const { - return impl.getEntry(); - } - - /** - * Provides access to the underlying data by interpreting it - * as an instance of type T. - */ - template - const T& access() const { - return impl.access(); - } - - /** - * Provides access to the underlying data by interpreting it - * as an array of instances of type T. - */ - template - const T* accessArray() const { - return &access(); - } - - // -- make it serializable -- - - static MemoryMappedInput load(utils::ArchiveReader& a) { - return { Impl::load(a) }; - } - - void store(utils::ArchiveWriter& a) const { - impl.store(a); - } - }; - - /** - * A utility for reading and writing the content of a storage entity (e.g. a file) through - * memory mapped IO. - */ - class MemoryMappedOutput { - - template - friend class IOManager; - - using Impl = impl::reference::MemoryMappedOutput; - - Impl impl; - - MemoryMappedOutput(Impl&& impl) : impl(impl) {} - - public: - - /** - * The identifier for the underlying storage entity. - */ - Entry getEntry() const { - return impl.getEntry(); - } - - /** - * Provides access to the underlying data by interpreting it - * as an instance of type T. - */ - template - T& access() const { - return impl.access(); - } - - /** - * Provides access to the underlying data by interpreting it - * as an array of instances of type T. - */ - template - T* accessArray() const { - return &access(); - } - - // -- make it serializable -- - - static MemoryMappedOutput load(utils::ArchiveReader& a) { - return { Impl::load(a) }; - } - - void store(utils::ArchiveWriter& a) const { - impl.store(a); - } - }; - - - - // -- IO Manager -------------------------------------------------------- - - /** - * An IO manager, as the central dispatcher for IO operations. - */ - template - class IOManager { - - using Impl = impl::reference::IOManager; - - Impl impl; - - public: - - /** - * Creates a new entry with the given name in the underlying storage system. - * - * @param name the name of the entry (e.g. file) - * @param mode whether it is a binary or text file - * @return a entry ID referencing the newly created resource - */ - Entry createEntry(const std::string& name, Mode mode = Mode::Text) { - return impl.createEntry(name, toRefMode(mode)); - } - - /** - * Register a new output stream with the given name within the system. - * The call will create the underlying file and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the name of the stream to be opened -- nothing happens if already opened - */ - InputStream openInputStream(Entry entry) { - return InputStream(impl.openInputStream(entry.entry)); - } - - /** - * Register a new output stream with the given name within the system. - * The call will create the underlying file and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the name of the stream to be opened -- nothing happens if already opened - */ - OutputStream openOutputStream(Entry entry) { - return OutputStream(impl.openOutputStream(entry.entry)); - } - - /** - * Register a new memory mapped input with the given name within the system. - * The call will load the underlying storage and prepare input operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the storage entry to be opened -- nothing happens if already opened - */ - MemoryMappedInput openMemoryMappedInput(Entry entry) { - return MemoryMappedInput(impl.openMemoryMappedInput(entry.entry)); - } - - /** - * Register a new memory mapped output with the given name within the system. - * The call will create the underlying storage and prepare output operations. - * - * NOTE: this method is not thread safe! - * - * @param entry the storage entry to be opened -- nothing happens if already opened - */ - MemoryMappedOutput openMemoryMappedOutput(Entry entry, std::size_t size) { - return MemoryMappedOutput(impl.openMemoryMappedOutput(entry.entry,size)); - } - - /** - * Obtains an input stream to read data from a storage entry. - * The storage entry is maintained by the manager and the provided output stream - * is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a stream to append data to - */ - InputStream getInputStream(Entry entry) { - return InputStream(impl.getInputStream(entry.entry)); - } - - /** - * Obtains an output stream to write data to a storage entry. - * The storage entry is maintained by the manager and the provided output stream - * is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a stream to append data to - */ - OutputStream getOutputStream(Entry entry) { - return OutputStream(impl.getOutputStream(entry.entry)); - } - - /** - * Obtains a memory mapped input to read data from a storage entry. - * The storage entry is maintained by the manager and the provided memory mapped - * input is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a requested memory mapped input - */ - MemoryMappedInput getMemoryMappedInput(Entry entry) { - return MemoryMappedInput(impl.getMemoryMappedInput(entry)); - } - - /** - * Obtains a memory mapped output to write data to a storage entry. - * The storage entry is maintained by the manager and the provided memory mapped - * output is only valid within the current thread. - * - * @param entry the name of the storage entry to be targeted -- must be open - * @return a requested memory mapped output - */ - MemoryMappedOutput getMemoryMappedOutput(Entry entry, std::size_t size) { - return MemoryMappedOutput(impl.getMemoryMappedOutput(entry,size)); - } - - /** - * Closes the given stream. - */ - void close(const InputStream& in) { - impl.close(in.istream); - } - - /** - * Closes the given stream. - */ - void close(const OutputStream& out) { - impl.close(out.ostream); - } - - /** - * Closes the given memory mapped entry. - */ - void close(const MemoryMappedInput& in) { - impl.close(in.impl); - } - - /** - * Closes the given memory mapped entry. - */ - void close(const MemoryMappedOutput& out) { - impl.close(out.impl); - } - - /** - * Determines whether the given entry exists. - */ - bool exists(Entry entry) const { - return impl.exists(entry.entry); - } - - /** - * Deletes the entry with the given name. - */ - void remove(Entry entry) { - impl.remove(entry.entry); - } - - }; - - // Definition of the BufferIOManager - class BufferIOManager : public IOManager { - - }; - - // Definition of the FileIOManager - class FileIOManager : public IOManager { - - /** - * Make constructor private to avoid instances. - */ - FileIOManager() {} - - public: - - /** - * Provide access to the singleton instance. - */ - static FileIOManager& getInstance() { - static FileIOManager mgr; - return mgr; - } - - }; - -} // end namespace core -} // end namespace api -} // end namespace allscale - diff --git a/vendor/allscale/api/core/prec.h b/vendor/allscale/api/core/prec.h deleted file mode 100644 index d4ce84c57..000000000 --- a/vendor/allscale/api/core/prec.h +++ /dev/null @@ -1,486 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "allscale/utils/functional_utils.h" -#include "allscale/utils/vector_utils.h" - -#include "allscale/api/core/treeture.h" - -namespace allscale { -namespace api { -namespace core { - - // ----- fun variants + utils ---------- - - template - class fun_variants : public std::tuple { - public: - explicit fun_variants(const Types&... args) : std::tuple(args...) { } - }; - - template - inline fun_variants make_fun_variants(const Types& ... elements) { - return fun_variants(elements...); - } - - template - struct is_fun_variants : public std::false_type {}; - - template - struct is_fun_variants> : public std::true_type {}; - - template - struct is_fun_variants : public is_fun_variants {}; - - template - struct is_fun_variants : public is_fun_variants {}; - - - - - namespace detail { - - template - struct result_wrapper { - - template - Out operator()(Fun&& fun) { - return fun(); - } - - }; - - template - struct result_wrapper,T> { - - template - completed_task operator()(Fun&& fun) { - return done(fun()); - } - - }; - - template<> - struct result_wrapper,void> { - - template - completed_task operator()(Fun&& fun) { - fun(); - return done(); - } - - }; - - template - struct result_wrapper,T> : public result_wrapper,T> {}; - - template - struct result_wrapper,T> : public result_wrapper,T> {}; - - template - struct result_wrapper,T> : public result_wrapper,T> {}; - - - struct call_first { - - template< - typename Res, - typename ... Versions, - typename ... Args - > - Res call(const fun_variants& versions, const Args& ... args) { - using res_type = decltype(std::get<0>(versions)(args...)); - result_wrapper wrap; - return wrap([&](){ return std::get<0>(versions)(args...); }); - } - - }; - - struct call_last { - - template< - typename Res, - typename ... Versions, - typename ... Args - > - Res call(const fun_variants& versions, const Args& ... args) { - using res_type = decltype(std::get(versions)(args...)); - result_wrapper wrap; - return wrap([&](){ return std::get(versions)(args...); }); - } - - }; - - } // end namespace detail - - - // ----- option handling handling ---------- - - - template - fun_variants pick(Options&& ... options) { - return make_fun_variants(std::move(options)...); - } - - - // ----- function handling ---------- - - template< - typename O, - typename I, - typename BaseCaseTest, - typename BaseCases, - typename StepCases - > - struct fun_def; - - template< - typename O, - typename I, - typename BaseCaseTest, - typename ... BaseCases, - typename ... StepCases - > - struct fun_def,fun_variants> { - typedef I in_type; - typedef O out_type; - - BaseCaseTest bc_test; - fun_variants base; - fun_variants step; - - fun_def( - const BaseCaseTest& test, - const fun_variants& base, - const fun_variants& step - ) : bc_test(test), base(base), step(step) {} - - fun_def(const fun_def& other) = default; - fun_def(fun_def&& other) = default; - - fun_def& operator=(const fun_def&) = delete; - fun_def& operator=(fun_def&&) = delete; - - template - impl::sequential::unreleased_treeture sequentialCall(impl::sequential::dependencies&& deps, const I& in, const Funs& ... funs) const { - // check for the base case, producing a value to be wrapped - if (bc_test(in)) { - return impl::sequential::spawn(std::move(deps),[&]{ return detail::call_first().template call(base, in); }); - } - - // run sequential step case producing an immediate value - return detail::call_last().template call>(step, in, funs.sequential_call()...); - } - - - template - impl::reference::unreleased_treeture parallelCall(impl::reference::dependencies&& deps, const I& in, const Funs& ... funs) const { - // check for the base case - const auto& base = this->base; - if (bc_test(in)) { - return impl::reference::spawn(std::move(deps), [=] { - return detail::call_first().template call(base, in); - }); - } - - // run step case - const auto& step = this->step; - return impl::reference::spawn( - // the dependencies of the new task - std::move(deps), - // the process version (sequential): - [=] { return detail::call_last().template call>(step, in, funs.sequential_call()...).get(); }, - // the split version (parallel): - [=] { return detail::call_first().template call>(step, in, funs.parallel_call()...); } - ); - } - - }; - - - - namespace detail { - - template - struct is_fun_def : public std::false_type {}; - - template - struct is_fun_def> : public std::true_type {}; - - template - struct is_fun_def : public is_fun_def {}; - - template - struct is_fun_def : public is_fun_def {}; - - template - struct is_fun_def : public is_fun_def {}; - - } - - template< - typename BT, typename First_BC, typename ... BC, typename ... SC, - typename O = typename utils::lambda_traits::result_type, - typename I = typename utils::lambda_traits::arg1_type - > - fun_def,fun_variants> - fun(const BT& a, const fun_variants& b, const fun_variants& c) { - return fun_def,fun_variants>(a,b,c); - } - - template< - typename BT, typename BC, typename SC, - typename filter = typename std::enable_if::value && !is_fun_variants::value,int>::type - > - auto fun(const BT& a, const BC& b, const SC& c) -> decltype(fun(a,make_fun_variants(b),make_fun_variants(c))) { - return fun(a,make_fun_variants(b),make_fun_variants(c)); - } - - template< - typename BT, typename BC, typename SC, - typename filter = typename std::enable_if::value && is_fun_variants::value,int>::type - > - auto fun(const BT& a, const BC& b, const SC& c) -> decltype(fun(a,make_fun_variants(b),c)) { - return fun(a,make_fun_variants(b),c); - } - - template< - typename BT, typename BC, typename SC, - typename filter = typename std::enable_if::value && !is_fun_variants::value,int>::type - > - auto fun(const BT& a, const BC& b, const SC& c) -> decltype(fun(a,b,make_fun_variants(c))) { - return fun(a,b,make_fun_variants(c)); - } - - - // --- recursive definitions --- - - template struct rec_defs; - - - namespace detail { - - - template< - unsigned i, - typename ... Defs - > - struct callable { - - using I = typename utils::type_at>::type::in_type; - using O = typename utils::type_at>::type::out_type; - - rec_defs defs; - - callable(const rec_defs& defs) : defs(defs) {}; - - struct SequentialCallable { - rec_defs defs; - - auto operator()(impl::sequential::dependencies&& deps, const I& in) const { - return impl::sequential::make_lazy_unreleased_treeture([=]() mutable { - return defs.template sequentialCall(std::move(deps),in); - }); - } - - auto operator()(const I& in) const { - return impl::sequential::make_lazy_unreleased_treeture([=](){ - return defs.template sequentialCall(impl::sequential::dependencies(),in); - }); - } - - }; - - auto sequential_call() const { - return SequentialCallable{defs}; - } - - - struct ParallelCallable { - rec_defs defs; - - template - auto operator()(impl::reference::dependencies&& deps, const I& in) const { - return defs.template parallelCall(std::move(deps),in); - } - - auto operator()(core::no_dependencies&&, const I& in) const { - return defs.template parallelCall(impl::reference::after(),in); - } - - auto operator()(const I& in) const { - return operator()(after(), in); - } - - }; - - auto parallel_call() const { - return ParallelCallable{defs}; - } - }; - - template< - unsigned i, - typename ... Defs - > - callable createCallable(const rec_defs& defs) { - return callable(defs); - } - - template - struct caller { - template - impl::sequential::unreleased_treeture sequentialCall(const F& f, impl::sequential::dependencies&& deps, const I& i, const D& d, const Args& ... args) const { - return caller().template sequentialCall(f,std::move(deps),i,d,createCallable(d),args...); - } - template - impl::reference::unreleased_treeture parallelCall(const F& f, impl::reference::dependencies&& deps, const I& i, const D& d, const Args& ... args) const { - return caller().template parallelCall(f,std::move(deps),i,d,createCallable(d),args...); - } - }; - - template<> - struct caller<0> { - template - auto sequentialCall(const F& f, impl::sequential::dependencies&& deps, const I& i, const D& d, const Args& ... args) const { - return f.sequentialCall(std::move(deps),i,createCallable<0>(d),args...); - } - template - impl::reference::unreleased_treeture parallelCall(const F& f, impl::reference::dependencies&& deps, const I& i, const D& d, const Args& ... args) const { - return f.template parallelCall(std::move(deps),i,createCallable<0>(d),args...); - } - }; - - - template - struct is_rec_def : public std::false_type {}; - - template - struct is_rec_def> : public std::true_type {}; - - template - struct is_rec_def : public is_rec_def {}; - - template - struct is_rec_def : public is_rec_def {}; - - template - struct is_rec_def : public is_rec_def {}; - - } - - - template - struct rec_defs : public std::tuple { - - template - rec_defs(const Args& ... args) : std::tuple(args...) {} - - rec_defs(const rec_defs&) = default; - rec_defs(rec_defs&&) = default; - - rec_defs& operator=(const rec_defs&) = delete; - rec_defs& operator=(rec_defs&&) = delete; - - template< - unsigned i, - typename O, - typename I - > - impl::sequential::unreleased_treeture sequentialCall(impl::sequential::dependencies&& deps, const I& in) const { - // call target function with a spawn - return detail::caller().template sequentialCall(std::get(*this),std::move(deps),in,*this); - } - - template< - bool root, - unsigned i, - typename O, - typename I, - typename DepsKind - > - impl::reference::unreleased_treeture parallelCall(impl::reference::dependencies&& deps, const I& in) const { - // call target function with a spawn - return detail::caller().template parallelCall(std::get(*this),std::move(deps),in,*this); - } - - }; - - - namespace detail { - - /** - * The struct forming the callable created by the prec operator. - */ - template< - unsigned i, - typename I, - typename O, - typename ... Defs - > - struct prec_operation { - - rec_defs defs; - - template - treeture operator()(impl::reference::dependencies&& deps, const I& in) { - return defs.template parallelCall(std::move(deps),in); - } - - treeture operator()(core::no_dependencies&&, const I& in) { - return defs.template parallelCall(impl::reference::after(),in); - } - - treeture operator()(const I& in) { - return (*this)(after(),in); - } - }; - - - } - - - template< - typename ... Defs - > - rec_defs group(const Defs& ... defs) { - return rec_defs(defs...); - } - - - // --- prec operator --- - - template< - unsigned i = 0, - typename ... Defs, - typename I = typename utils::type_at>::type::in_type, - typename O = typename utils::type_at>::type::out_type - > - auto prec(const rec_defs& defs) { - return detail::prec_operation{defs}; - } - - template< - unsigned i = 0, - typename First, - typename ... Rest, - typename dummy = typename std::enable_if::value,int>::type - > - auto prec(const First& f, const Rest& ... r) { - return prec(group(f,r...)); - } - - template< - typename BT, typename BC, typename SC, - typename dummy = typename std::enable_if::value,int>::type - > - auto prec(const BT& t, const BC& b, const SC& s) { - return prec<0>(group(fun(t,b,s))); - } - -} // end namespace core -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/core/treeture.h b/vendor/allscale/api/core/treeture.h deleted file mode 100644 index bfac6da2d..000000000 --- a/vendor/allscale/api/core/treeture.h +++ /dev/null @@ -1,430 +0,0 @@ -#pragma once - -#include - -/** - * This header file formalizes the general, public interface of treetures, independent - * of any actual implementation. - * - * TODO: extend on this here ... - */ - -#include "allscale/api/core/impl/sequential/treeture.h" -#include "allscale/api/core/impl/reference/treeture.h" - -namespace allscale { -namespace api { -namespace core { - - - // -------------------------------------------------------------------------------------------- - // Treetures - // -------------------------------------------------------------------------------------------- - - - /** - * The actual treeture, referencing the computation of a value. - */ - template - using treeture = impl::reference::treeture; - - /** - * A reference to a sub-task, to create - */ - using task_reference = impl::reference::task_reference; - - - // --------------------------------------------------------------------------------------------- - // Auxiliary Construct - // --------------------------------------------------------------------------------------------- - - - namespace detail { - - template - struct completed_task { - - using value_type = T; - - T value; - - operator impl::sequential::unreleased_treeture() { - return impl::sequential::done(value); - } - - operator impl::reference::unreleased_treeture() { - return impl::reference::done(value); - } - - operator impl::sequential::treeture() { - return impl::sequential::done(value); - } - - operator impl::reference::treeture() { - return impl::reference::done(value); - } - - T get() { - return value; - } - - }; - - template<> - struct completed_task { - - using value_type = void; - - operator impl::sequential::unreleased_treeture() { - return impl::sequential::done(); - } - - operator impl::reference::unreleased_treeture() { - return impl::reference::done(); - } - - operator impl::sequential::treeture() { - return impl::sequential::done(); - } - - operator impl::reference::treeture() { - return impl::reference::done(); - } - - void get() { - } - - }; - - } - - - // --------------------------------------------------------------------------------------------- - // Operators - // --------------------------------------------------------------------------------------------- - - // --- dependencies --- - - class no_dependencies { - - public: - - operator impl::sequential::dependencies() const { - return impl::sequential::after(); - } - - operator impl::reference::dependencies>() const { - return impl::reference::after(); - } - - operator impl::reference::dependencies() const { - return impl::reference::after(std::vector()); - } - - }; - - // --- utility to identify dependencies --- - - template - struct is_dependency : public std::false_type {}; - - template<> - struct is_dependency : public std::true_type {}; - - template<> - struct is_dependency : public std::true_type {}; - - template - struct is_dependency> : public std::true_type {}; - - - // -- no dependencies -- - - inline auto after() { - return no_dependencies(); - } - - - // -- sequential -- - - template - auto after(const impl::sequential::task_reference& first, const Rest& ... rest) { - return impl::sequential::after(first, rest...); - } - - - // -- reference -- - - template - auto after(const impl::reference::task_reference& first, const Rest& ... rest) { - return impl::reference::after(first, rest...); - } - - - // --- releasing tasks --- - - template - inline impl::sequential::treeture run(impl::sequential::unreleased_treeture&& treeture) { - return std::move(treeture).release(); - } - - template - inline impl::sequential::treeture run(impl::sequential::lazy_unreleased_treeture&& treeture) { - return std::move(treeture).release(); - } - - template - inline impl::reference::treeture run(impl::reference::unreleased_treeture&& treeture) { - return std::move(treeture).release(); - } - - - // --- completed tasks --- - - inline detail::completed_task done() { - return detail::completed_task(); - } - - template - detail::completed_task done(const T& value) { - return detail::completed_task{value}; - } - - - // --- control flow --- - - - namespace detail { - - /** - * Different implementations utilized by this reference implementation. - */ - - struct DoneImpl { - - template - auto convertParameter(completed_task&& a) const { - return std::move(a); - } - - template - auto sequential(completed_task&&,completed_task&&) { - return done(); - } - - template - auto sequential(const D&, completed_task&&,completed_task&&) { - return done(); - } - - template - auto parallel(completed_task&&,completed_task&&) { - return done(); - } - - template - auto parallel(const D&, completed_task&&,completed_task&&) { - return done(); - } - - template - auto combine(completed_task&& a, completed_task&& b, M&& m, bool) { - return done(m(a.get(),b.get())); - } - - template - auto combine(const D&, completed_task&& a, completed_task&& b, M&& m, bool) { - return done(m(a.get(),b.get())); - } - - }; - - struct SequentialImpl { - - template - auto convertParameter(completed_task&& a) const { - return impl::sequential::done(a.get()); - } - - template - auto convertParameter(impl::sequential::lazy_unreleased_treeture&& a) const { - return std::move(a); - } - - template - auto sequential(impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b) { - return impl::sequential::seq(std::move(a),std::move(b)); - } - - template - auto sequential(impl::sequential::dependencies&& deps, impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b) { - return impl::sequential::seq(std::move(deps),std::move(a),std::move(b)); - } - - template - auto parallel(impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b) { - return impl::sequential::par(std::move(a),std::move(b)); - } - - template - auto parallel(impl::sequential::dependencies&& deps, impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b) { - return impl::sequential::par(std::move(deps),std::move(a),std::move(b)); - } - - template - auto combine(impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b, M&& m, bool parallel) { - return impl::sequential::combine(std::move(a),std::move(b),std::move(m), parallel); - } - - template - auto combine(impl::sequential::dependencies&& deps, impl::sequential::lazy_unreleased_treeture&& a, impl::sequential::lazy_unreleased_treeture&& b, M&& m, bool parallel) { - return impl::sequential::combine(std::move(deps),std::move(a),std::move(b),std::move(m), parallel); - } - }; - - struct ReferenceImpl { - - template - auto convertParameter(completed_task&& a) const { - return impl::reference::done(a.get()); - } - - template - auto convertParameter(impl::reference::unreleased_treeture&& a) const { - return std::move(a); - } - - template - auto sequential(impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b) { - return impl::reference::seq(std::move(a),std::move(b)); - } - - template - auto sequential(impl::reference::dependencies&& deps, impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b) { - return impl::reference::seq(std::move(deps),std::move(a),std::move(b)); - } - - template - auto parallel(impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b) { - return impl::reference::par(std::move(a),std::move(b)); - } - - template - auto parallel(impl::reference::dependencies&& deps, impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b) { - return impl::reference::par(std::move(deps),std::move(a),std::move(b)); - } - - template - auto combine(impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b, M&& m, bool parallel) { - return impl::reference::combine(std::move(a),std::move(b),std::move(m), parallel); - } - - template - auto combine(impl::reference::dependencies&& deps, impl::reference::unreleased_treeture&& a, impl::reference::unreleased_treeture&& b, M&& m, bool parallel) { - return impl::reference::combine(std::move(deps),std::move(a),std::move(b),std::move(m), parallel); - } - }; - - - /** - * A mapping of parameter combinations to implementations: - * - * done, done -> done - * - * seq , seq -> seq - * seq , done -> seq - * done, seq -> seq - * - * ref , ref -> ref - * ref , done -> ref - * done, ref -> ref - * - * others are illegal - */ - - template - struct implementation; - - template - struct implementation,completed_task> : public DoneImpl {}; - - template - struct implementation,impl::sequential::lazy_unreleased_treeture> : public SequentialImpl {}; - - template - struct implementation,completed_task> : public SequentialImpl {}; - - template - struct implementation,impl::sequential::lazy_unreleased_treeture> : public SequentialImpl {}; - - template - struct implementation,impl::reference::unreleased_treeture> : public ReferenceImpl {}; - - template - struct implementation,completed_task> : public ReferenceImpl {}; - - template - struct implementation,impl::reference::unreleased_treeture> : public ReferenceImpl {}; - - } - - - // -- sequential -- - - template::value,int>::type = 1> - auto sequential(D&& deps, A&& a, B&& b) { - detail::implementation impl; - return impl.sequential(std::move(deps),impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b))); - } - - template - auto sequential(A&& a, B&& b) { - detail::implementation impl; - return impl.sequential(impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b))); - } - - template - auto sequential(A&& a, B&& b, Rest&& ... rest) { - return sequential(sequential(std::move(a),std::move(b)),std::move(rest)...); - } - - - // -- parallel -- - - template::value,int>::type = 1> - auto parallel(D&& deps, A&& a, B&& b) { - detail::implementation impl; - return impl.parallel(std::move(deps),impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b))); - } - - template - auto parallel(A&& a, B&& b) { - detail::implementation impl; - return impl.parallel(impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b))); - } - - template - auto parallel(A&& a, B&& b, Rest&& ... rest) { - return parallel(parallel(std::move(a),std::move(b)),std::move(rest)...); - } - - // --- aggregation --- - - - template - auto combine(D&& deps, A&& a, B&& b, M&& m, bool parallel = true) { - detail::implementation impl; - return impl.combine(std::move(deps),impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b)), std::move(m), parallel); - } - - template - auto combine(A&& a, B&& b, M&& m, bool parallel = true) { - detail::implementation impl; - return impl.combine(impl.convertParameter(std::move(a)),impl.convertParameter(std::move(b)), std::move(m), parallel); - } - - -} // end namespace core -} // end namespace api -} // end namespace allscale - diff --git a/vendor/allscale/api/user/algorithm/async.h b/vendor/allscale/api/user/algorithm/async.h deleted file mode 100644 index 51d1c502a..000000000 --- a/vendor/allscale/api/user/algorithm/async.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/assert.h" - -#include "allscale/api/core/prec.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - - - // --------------------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------------------- - - - /** - * A simple job wrapper processing a given task asynchronously. The task - * is wrapped to a simple recursion where there is a single base - * case step. - * - * @tparam Action the type of action - * @param action the action to be processed - * @return a treeture providing a reference the the result - */ - template - core::treeture> async(const Action& action); - - - /** - * A simple job wrapper processing a given task asynchronously after the - * given dependencies are satisfied. The task is wrapped to a simple recursion - * where there is a single base case step. - * - * @tparam Dependencies the dependencies to await - * @tparam Action the type of action - * @param action the action to be processed - * @return a treeture providing a reference the the result - */ - template - core::treeture> async(Dependencies&& deps, const Action& action); - - - - - // --------------------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------------------- - - - template - core::treeture> async(const Action& action) { - return async(core::after(), action); - } - - - template - core::treeture> async(Dependencies&& deps, const Action& action) { - struct empty {}; - return core::prec( - [](empty){ return true; }, - [=](empty){ - return action(); - }, - [=](empty,const auto&){ - assert_fail() << "Should not be reached!"; - return action(); - } - )(std::move(deps), empty()); - } - - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/internal/operation_reference.h b/vendor/allscale/api/user/algorithm/internal/operation_reference.h deleted file mode 100644 index 1e754f534..000000000 --- a/vendor/allscale/api/user/algorithm/internal/operation_reference.h +++ /dev/null @@ -1,114 +0,0 @@ -#include - -#include "allscale/api/core/treeture.h" - -#include "allscale/utils/assert.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { -namespace internal { - - - /** - * An operation reference is an (optional) base implementation - * of the return values of asynchronous operations. Unlike plain - * treetures, operator references are waiting for their tasks - * to be completed before destruction. - */ - class operation_reference { - - /** - * The treeture wrapped by this references, which corresponds - * to the root task of the asynchronously started task. - */ - core::treeture handle; - - public: - - /** - * A simple constructor taking 'ownership' on the given treeture. - */ - operation_reference(core::treeture&& handle) - : handle(std::move(handle)) {} - - /** - * A simple constructor taking 'ownership' on the given completed task. - */ - operation_reference(core::detail::completed_task&&) - : handle() {} - - /** - * A default constructor, not owning or syncing on anything. - */ - operation_reference() {}; - - /** - * Operation references may not be copied. - */ - operation_reference(const operation_reference&) = delete; - - /** - * Operation references may be moved. - */ - operation_reference(operation_reference&&) = default; - - /** - * Operation references may not be copied. - */ - operation_reference& operator=(const operation_reference&) = delete; - - /** - * Operation references may be moved. - */ - operation_reference& operator=(operation_reference&&) = default; - - /** - * Upon destruction, the references is waiting on the underlying - * task if it is still owned. - */ - ~operation_reference() { - // if handle is still valid, wait for its completion - if (handle.isValid()) handle.wait(); - } - - /** - * A non-blocking check whether the referenced operation is done. - */ - bool isDone() const { - return handle.isDone(); - } - - /** - * Determines whether a task is attached to this reference. - */ - bool isValid() const { - return handle.isValid(); - } - - /** - * Disconnects the referenced task, causing this reference no longer - * to wait on the given task upon destruction. - * - * @return returns the maintained task handle - */ - core::treeture detach() { - return std::move(handle); - } - - /** - * Blocks until the underlying operation has been completed. - */ - void wait() { - handle.wait(); - } - - }; - - -} // end namespace internal -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/pfor.h b/vendor/allscale/api/user/algorithm/pfor.h deleted file mode 100644 index 881f676b2..000000000 --- a/vendor/allscale/api/user/algorithm/pfor.h +++ /dev/null @@ -1,1758 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/assert.h" - -#include "allscale/api/core/prec.h" - -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - // ----- forward declarations ------ - - namespace detail { - - /** - * The object representing the iterator range of a (parallel) loop. - */ - template - class range; - - - // -- Adaptive Loop Dependencies -- - - /** - * The token produced by the pfor operator to reference the execution - * of a parallel loop. - */ - template - class loop_reference; - - /** - * A marker type for loop dependencies. - */ - struct loop_dependency {}; - - /** - * A test for loop dependencies. - */ - template - struct is_loop_dependency : public std::is_base_of {}; - - /** - * A small container for splitting dependencies. - */ - template - struct SubDependencies { - Dependency left; - Dependency right; - }; - - } // end namespace detail - - /** - * The dependency to be used if no dependencies are required. - */ - struct no_dependencies : public detail::loop_dependency { - - detail::SubDependencies split() const { - return detail::SubDependencies(); - } - - }; - - // --------------------------------------------------------------------------------------------- - // Basic Generic pfor Operators - // --------------------------------------------------------------------------------------------- - - /** - * The generic version of all parallel loops with synchronization dependencies. - * - * @tparam Iter the type of the iterator to pass over - * @tparam Body the type of the body operation, thus the operation to be applied on each element in the given range - * @tparam Dependency the type of the dependencies to be enforced - * - * @param r the range to iterate over - * @param body the operation to be applied on each element of the given range - * @param dependency the dependencies to be obeyed when scheduling the iterations of this parallel loop - * - * @return a reference to the iterations of the processed parallel loop to be utilized for forming dependencies - */ - template - detail::loop_reference pfor(const detail::range& r, const Body& body, const Dependency& dependency); - - /** - * The generic version of all parallel loops without synchronization dependencies. - * - * @tparam Iter the type of the iterator to pass over - * @tparam Body the type of the body operation, thus the operation to be applied on each element in the given range - * - * @param r the range to iterate over - * @param body the operation to be applied on each element of the given range - * - * @return a reference to the iterations of the processed parallel loop to be utilized for forming dependencies - */ - template - detail::loop_reference pfor(const detail::range& r, const Body& body, const no_dependencies& = no_dependencies()); - - - // --------------------------------------------------------------------------------------------- - // pfor Operators with Boundaries - // --------------------------------------------------------------------------------------------- - - /** - * The generic version of all parallel loops with synchronization dependencies. - * - * @tparam Iter the type of the iterator to pass over - * @tparam InnerBody the type of the inner body operation, thus the operation to be applied on each element in the given range that is not on the surface - * @tparam BoundaryBody the type of the boundary body operation, thus the operation to be applied on each element in the given range that is on the surface - * @tparam Dependency the type of the dependencies to be enforced - * - * @param r the range to iterate over - * @param innerBody the operation to be applied on each element of the given range that is not on the surface - * @param boundaryBody the operation to be applied on each element of the given range that is on the surface - * @param dependency the dependencies to be obeyed when scheduling the iterations of this parallel loop - * - * @return a reference to the iterations of the processed parallel loop to be utilized for forming dependencies - */ - template - detail::loop_reference pforWithBoundary(const detail::range& r, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const Dependency& dependency); - - /** - * The generic version of all parallel loops without synchronization dependencies. - * - * @tparam Iter the type of the iterator to pass over - * @tparam InnerBody the type of the inner body operation, thus the operation to be applied on each element in the given range that is not on the surface - * @tparam BoundaryBody the type of the boundary body operation, thus the operation to be applied on each element in the given range that is on the surface - * - * @param r the range to iterate over - * @param innerBody the operation to be applied on each element of the given range that is not on the surface - * @param boundaryBody the operation to be applied on each element of the given range that is on the surface - * - * @return a reference to the iterations of the processed parallel loop to be utilized for forming dependencies - */ - template - detail::loop_reference pforWithBoundary(const detail::range& r, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const no_dependencies& = no_dependencies()); - - - // --------------------------------------------------------------------------------------------- - // The after Utility - // --------------------------------------------------------------------------------------------- - - /** - * A generic utility for inserting a single action into a single a chain of dependencies. The given action will be triggered - * once the corresponding iteration in the given loop reference has been completed. The resulting loop reference can be utilized - * by consecutive operations to synchronize on the completion of the concatenation of the given loop reference and inserted action. - * - * @tparam Iter the type of iterator the preceding loop operated on - * @tparam Point the iterator value of the point this action shell be associated to - * @tparam Action the type of action to be performed - * - * @param loop preceding loop - * @param point the point to which this event shell be associated to - * @param action the action to be performed - * @return a customized loop reference to sync upon the concatenation of this - */ - template - detail::loop_reference after(const detail::loop_reference& loop, const Point& point, const Action& action); - - - // --------------------------------------------------------------------------------------------- - // adapters for the pfor operator - // --------------------------------------------------------------------------------------------- - - template - detail::loop_reference> pfor(const std::array& a, const std::array& b, const Body& body) { - return pfor(detail::range>(a,b),body); - } - - template - detail::loop_reference> pfor(const std::array& a, const std::array& b, const Body& body, const Dependency& dependency) { - return pfor(detail::range>(a,b),body,dependency); - } - - template - detail::loop_reference> pforWithBoundary(const std::array& a, const std::array& b, const InnerBody& innerBody, const BoundaryBody& boundaryBody) { - return pforWithBoundary(detail::range>(a,b),innerBody,boundaryBody); - } - - template - detail::loop_reference> pforWithBoundary(const std::array& a, const std::array& b, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const Dependency& dependency) { - return pforWithBoundary(detail::range>(a,b),innerBody,boundaryBody,dependency); - } - - /** - * A parallel for-each implementation iterating over the given range of elements. - */ - template - detail::loop_reference pfor(const Iter& a, const Iter& b, const Body& body, const Dependency& dependency) { - return pfor(detail::range(a,b),body,dependency); - } - - template - detail::loop_reference pfor(const Iter& a, const Iter& b, const Body& body) { - return pfor(a,b,body,no_dependencies()); - } - - template - detail::loop_reference pforWithBoundary(const Iter& a, const Iter& b, const InnerBody& innerBody, const BoundaryBody& boundaryBody) { - return pforWithBoundary(detail::range(a,b),innerBody,boundaryBody); - } - - template - detail::loop_reference pforWithBoundary(const Iter& a, const Iter& b, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const Dependency& dependency) { - return pforWithBoundary(detail::range(a,b),innerBody,boundaryBody,dependency); - } - - // ---- container support ---- - - /** - * A parallel for-each implementation iterating over the elements of the given, mutable container. - */ - template - detail::loop_reference - pfor(Container& c, const Op& op) { - return pfor(c.begin(), c.end(), op); - } - - /** - * A parallel for-each implementation iterating over the elements of the given, mutable container. - */ - template - std::enable_if_t::value,detail::loop_reference> - pfor(Container& c, const Op& op, const Dependency& dependency) { - return pfor(c.begin(), c.end(), op, dependency); - } - - - /** - * A parallel for-each implementation iterating over the elements of the given container. - */ - template - detail::loop_reference - pfor(const Container& c, const Op& op) { - return pfor(c.begin(), c.end(), op); - } - - /** - * A parallel for-each implementation iterating over the elements of the given container. - */ - template - detail::loop_reference - pfor(const Container& c, const Op& op, const Dependency& dependency) { - return pfor(c.begin(), c.end(), op, dependency); - } - - - // ---- Vector support ---- - - /** - * A parallel for-each implementation iterating over the elements of the points covered by - * the hyper-box limited by the given vectors. - */ - template - detail::loop_reference> pfor(const utils::Vector& a, const utils::Vector& b, const Body& body) { - return pfor(detail::range>(a,b),body); - } - - /** - * A parallel for-each implementation iterating over the elements of the points covered by - * the hyper-box limited by the given vectors. Optional dependencies may be passed. - */ - template - detail::loop_reference> pfor(const utils::Vector& a, const utils::Vector& b, const Body& body, const Dependencies& dependencies) { - return pfor(detail::range>(a,b),body,dependencies); - } - - /** - * A parallel for-each implementation iterating over the elements of the points covered by - * the hyper-box limited by the given vector. - */ - template - auto pfor(const utils::Vector& a, const Body& body) { - return pfor(utils::Vector(0),a,body); - } - - /** - * A parallel for-each implementation iterating over the elements of the points covered by - * the hyper-box limited by the given vector. Optional dependencies may be passed. - */ - template - auto pfor(const utils::Vector& a, const Body& body, const Dependencies& dependencies) { - return pfor(utils::Vector(0),a,body,dependencies); - } - - // ------------------------------------------------------------------------------------------- - // Adaptive Synchronization - // ------------------------------------------------------------------------------------------- - - - /** - * A dependency forming the conjunction of a list of given dependencies. - */ - template - class conjunction_sync_dependency; - - /** - * A factory for a conjunction of dependencies. - */ - template - conjunction_sync_dependency sync_all(const Dependencies& ... dependencies) { - return conjunction_sync_dependency(dependencies...); - } - - /** - * A dependency actually representing no dependency. Could be used as a place-holder. - */ - class no_dependency; - - /** - * A factory for no synchronization dependencies. Could be used as a place-holder. - */ - no_dependency no_sync(); - - /** - * A dependency between loop iterations where iteration i of a new parallel loop may be executed - * as soon as iteration i of a given parallel loop has been completed. - * - * @param Iter the iterator type utilized to address iterations - */ - template - class one_on_one_dependency; - - /** - * A factory for one_on_one dependencies. - */ - template - one_on_one_dependency one_on_one(const detail::loop_reference& dep) { - return one_on_one_dependency(dep); - } - - /** - * A dependency between loop iterations where iteration i of a new parallel loop may be executed - * as soon as iterations { i + c | c \in {-1,0,1}^n && |c| <= 1 } of a given parallel loop has been completed. - * - * @param Iter the iterator type utilized to address iterations - */ - template - class small_neighborhood_sync_dependency; - - /** - * A factory for small neighborhood sync dependencies. - */ - template - small_neighborhood_sync_dependency small_neighborhood_sync(const detail::loop_reference& dep) { - return small_neighborhood_sync_dependency(dep); - } - - /** - * A dependency between loop iterations where iteration i of a new parallel loop may be executed - * as soon as iterations { i + c | c \in {-1,0,1}^n } of a given parallel loop has been completed. - * - * @param Iter the iterator type utilized to address iterations - */ - template - class full_neighborhood_sync_dependency; - - /** - * A factory for full neighborhood sync dependencies. - */ - template - full_neighborhood_sync_dependency full_neighborhood_sync(const detail::loop_reference& dep) { - return full_neighborhood_sync_dependency(dep); - } - - /** - * A dependency between loop iterations where iteration i of a new parallel loop may be executed - * as soon the entire range of a given loop has been executed. - * - * @param Iter the iterator type utilized to address iterations - */ - template - class after_all_sync_dependency; - - /** - * A factory for after-all sync dependencies. - */ - template - after_all_sync_dependency after_all_sync(const detail::loop_reference& dep) { - return after_all_sync_dependency(dep); - } - - - // ------------------------------------------------------------------------------------------- - // Range Utils - // ------------------------------------------------------------------------------------------- - - - namespace detail { - - // -- obtain number of dimensions of an iterator -- - - template - struct dimensions { - enum { value = 1 }; - }; - - template - struct dimensions> { - enum { value = D }; - }; - - template - struct dimensions> { - enum { value = D }; - }; - - // -- distances between begin and end of iterators -- - - template - struct volume { - size_t operator()(const Iter& a, const Iter& b) const { - return std::distance(a,b); - } - }; - - template - struct volume::value,bool>> { - size_t operator()(Int a, Int b) const { - return (a < b) ? b-a : 0; - } - }; - - template - struct volume> { - size_t operator()(const std::array& a, const std::array& b) const { - volume inner; - size_t res = 1; - for(size_t i = 0; i - struct volume> { - size_t operator()(const utils::Vector& a, const utils::Vector& b) const { - return volume>()(a,b); - } - }; - - // -- minimum distance between elements along individual dimensions -- - - template - struct min_dimension_length { - size_t operator()(const Iter& a, const Iter& b) const { - return std::distance(a,b); - } - }; - - template - struct min_dimension_length::value,bool>> { - size_t operator()(Int a, Int b) const { - return (a < b) ? b-a : 0; - } - }; - - template - struct min_dimension_length> { - size_t operator()(const std::array& a, const std::array& b) const { - min_dimension_length inner; - size_t res = std::numeric_limits::max(); - for(size_t i = 0; i - struct min_dimension_length> { - size_t operator()(const utils::Vector& a, const utils::Vector& b) const { - return min_dimension_length>()(a,b); - } - }; - - template - size_t getMinimumDimensionLength(const range& r) { - return min_dimension_length()(r.begin(),r.end()); - } - - // -- coverage -- - - template - bool covers(const Iter& a_begin, const Iter& a_end, const Iter& b_begin, const Iter& b_end) { - return b_begin >= b_end || (a_begin <= b_begin && b_end <= a_end); - } - - template - bool covers(const utils::Vector& a_begin, const utils::Vector& a_end, const utils::Vector& b_begin, const utils::Vector& b_end) { - // if the second is empty, it is covered - for(size_t i=0; i= b_end[i]) return true; - } - // check that a non-empty range is covered - for(size_t i=0; i - bool covers(const Iter& begin, const Iter& end, const Point& p) { - return begin <= p && p < end; - } - - template - bool covers(const utils::Vector& begin, const utils::Vector& end, const utils::Vector& point) { - for(size_t i=0; i - auto access(const Iter& iter) -> decltype(*iter) { - return *iter; - } - - template - typename std::enable_if::value,T>::type access(T a) { - return a; - } - - - // -- scan utility -- - - template - void forEach(const Iter& fullBegin, const Iter& fullEnd, const Iter& a, const Iter& b, const InnerOp& inner, const BoundaryOp& boundary) { - - // cut off empty loop - if (a == b) return; - - // get inner range - Iter innerBegin = a; - Iter innerEnd = b; - - // check for boundaries - if (fullBegin == a) { - boundary(access(a)); - innerBegin++; - } - - // reduce inner range if b is the end - if (fullEnd == b) { - innerEnd--; - } - - // process inner part - for(auto it = innerBegin; it != innerEnd; ++it) { - inner(access(it)); - } - - // process left boundary - if(fullEnd == b) { - boundary(access(b-1)); - } - } - - - template - void forEach(const Iter& a, const Iter& b, const InnerOp& inner, const BoundaryOp& boundary) { - - // cut off empty loop - if (a == b) return; - - // process left boundary - boundary(access(a)); - if (a + 1 == b) return; - - // process inner part - for(auto it = a+1; it != b-1; ++it) { - inner(access(it)); - } - - // process left boundary - boundary(access(b-1)); - } - - template - void forEach(const Iter& a, const Iter& b, const Op& op) { - for(auto it = a; it != b; ++it) { - op(access(it)); - } - } - - template - struct point_factory; - - template - struct point_factory> { - template - std::array operator()(Coordinates ... coordinates) { - return { { coordinates ... } }; - } - }; - - template - struct point_factory> { - template - utils::Vector operator()(Coordinates ... coordinates) { - return utils::Vector(coordinates...); - } - }; - - - template - struct scanner { - scanner nested; - template class Compound, typename Iter, size_t dims, typename Op, typename ... Coordinates> - void operator()(const Compound& begin, const Compound& end, const Op& op, Coordinates ... coordinates) { - auto a = begin[dims-idx]; - auto b = end[dims-idx]; - for(Iter i = a; i != b ; ++i) { - nested(begin,end,op,coordinates...,i); - } - } - }; - - template<> - struct scanner<0> { - template class Compound, typename Iter, size_t dims, typename Op, typename ... Coordinates> - void operator()(const Compound&, const Compound&, const Op& op, Coordinates ... coordinates) { - point_factory> factory; - op(factory(coordinates...)); - } - }; - - template - struct scanner_with_boundary { - scanner_with_boundary nested; - template class Compound, typename Iter, size_t dims, typename Op> - void operator()(const Compound& begin, const Compound& end, Compound& cur, const Op& op) { - auto& i = cur[dims-idx]; - for(i = begin[dims-idx]; i != end[dims-idx]; ++i ) { - nested(begin, end, cur, op); - } - } - template class Compound, typename Iter, size_t dims, typename Inner, typename Boundary> - void operator()(const Compound& begin, const Compound& end, Compound& cur, const Inner& inner, const Boundary& boundary) { - auto& i = cur[dims-idx]; - - // extract range - const auto& a = begin[dims-idx]; - const auto& b = end[dims-idx]; - - // check empty range - if (a==b) return; - - // handle left boundary - i = a; nested(begin,end,cur,boundary); - - // check whether this has been all - if (a + 1 == b) return; - - // process inner part - for(i = a+1; i!=b-1; ++i) { - nested(begin,end,cur,inner,boundary); - } - - // handle right boundary - i = b-1; - nested(begin,end,cur,boundary); - } - - template class Compound, typename Iter, size_t dims, typename Inner, typename Boundary> - void operator()(const Compound& fullBegin, const Compound& fullEnd, const Compound& begin, const Compound& end, Compound& cur, const Inner& inner, const Boundary& boundary) { - auto& i = cur[dims-idx]; - - // extract range - const auto& fa = fullBegin[dims-idx]; - const auto& fb = fullEnd[dims-idx]; - - const auto& a = begin[dims-idx]; - const auto& b = end[dims-idx]; - - // check empty range - if (a==b) return; - - // get inner range - auto ia = a; - auto ib = b; - - // handle left boundary - if (fa == ia) { - i = ia; - nested(begin,end,cur,boundary); - ia++; - } - - if (fb == b) { - ib--; - } - - // process inner part - for(i = ia; i!=ib; ++i) { - nested(fullBegin,fullEnd,begin,end,cur,inner,boundary); - } - - // handle right boundary - if (fb == b) { - i = b-1; - nested(begin,end,cur,boundary); - } - } - }; - - template<> - struct scanner_with_boundary<0> { - template class Compound, typename Iter, size_t dims, typename Op> - void operator()(const Compound&, const Compound&, Compound& cur, const Op& op) { - op(cur); - } - template class Compound, typename Iter, size_t dims, typename Inner, typename Boundary> - void operator()(const Compound&, const Compound&, Compound& cur, const Inner& inner, const Boundary&) { - inner(cur); - } - template class Compound, typename Iter, size_t dims, typename Inner, typename Boundary> - void operator()(const Compound&, const Compound&, const Compound&, const Compound&, Compound& cur, const Inner& inner, const Boundary&) { - inner(cur); - } - }; - - template - void forEach(const std::array& fullBegin, const std::array& fullEnd, const std::array& begin, const std::array& end, const InnerOp& inner, const BoundaryOp& boundary) { - - // the current position - std::array cur; - - // scan range - detail::scanner_with_boundary()(fullBegin, fullEnd, begin, end, cur, inner, boundary); - } - - template - void forEach(const std::array& begin, const std::array& end, const InnerOp& inner, const BoundaryOp& boundary) { - - // the current position - std::array cur; - - // scan range - detail::scanner_with_boundary()(begin, end, cur, inner, boundary); - } - - template - void forEach(const std::array& begin, const std::array& end, const Op& op) { - // scan range - detail::scanner()(begin, end, op); - } - - template - void forEach(const utils::Vector& fullBegin, const utils::Vector& fullEnd, const utils::Vector& begin, const utils::Vector& end, const InnerOp& inner, const BoundaryOp& boundary) { - - // the current position - utils::Vector cur; - - // scan range - detail::scanner_with_boundary()(fullBegin, fullEnd, begin, end, cur, inner, boundary); - } - - template - void forEach(const utils::Vector& begin, const utils::Vector& end, const InnerOp& inner, const BoundaryOp& boundary) { - - // the current position - utils::Vector cur; - - // scan range - detail::scanner_with_boundary()(begin, end, cur, inner, boundary); - } - - - template - void forEach(const utils::Vector& begin, const utils::Vector& end, const Op& op) { - // scan range - detail::scanner()(begin, end, op); - } - - - template - Iter grow(const Iter& value, const Iter& limit, int steps) { - return std::min(limit, value+steps); - } - - template - std::array grow(const std::array& value, const std::array& limit, int steps) { - std::array res; - for(unsigned i=0; i - utils::Vector grow(const utils::Vector& value, const utils::Vector& limit, int steps) { - utils::Vector res; - for(unsigned i=0; i - Iter shrink(const Iter& value, const Iter& limit, int steps) { - return std::max(limit, value-steps); - } - - template - std::array shrink(const std::array& value, const std::array& limit, int steps) { - std::array res; - for(unsigned i=0; i - utils::Vector shrink(const utils::Vector& value, const utils::Vector& limit, int steps) { - utils::Vector res; - for(unsigned i=0; i - struct fragments { - range left; - range right; - }; - - template - fragments make_fragments(const range& left, const range& right) { - return fragments{ left, right }; - } - - template - struct range_spliter; - - /** - * The object representing the iterator range of a (parallel) loop. - */ - template - class range { - - /** - * The begin of this range (inclusive). - */ - Iter _begin; - - /** - * The end of this range (exclusive). - */ - Iter _end; - - public: - - range() : _begin(), _end() {} - - range(const Iter& begin, const Iter& end) - : _begin(begin), _end(end) { - if (empty()) { _end = _begin; } - } - - size_t size() const { - return detail::volume()(_begin,_end); - } - - bool empty() const { - return size() == 0; - } - - const Iter& begin() const { - return _begin; - } - - const Iter& end() const { - return _end; - } - - bool covers(const range& r) const { - return detail::covers(_begin,_end,r._begin,r._end); - } - - template - bool covers(const Point& p) const { - return detail::covers(_begin,_end,p); - } - - range grow(const range& limit, int steps = 1) const { - return range( - detail::shrink(_begin,limit.begin(),steps), - detail::grow(_end,limit.end(),steps) - ); - } - - range shrink(int steps = 1) const { - return grow(*this, -steps); - } - - fragments split(std::size_t depth) const { - return range_spliter::split(depth,*this); - } - - template - void forEach(const Op& op) const { - detail::forEach(_begin,_end,op); - } - - template - void forEachWithBoundary(const range& full, const InnerOp& inner, const BoundaryOp& boundary) const { - detail::forEach(full._begin,full._end,_begin,_end,inner,boundary); - } - - friend std::ostream& operator<<(std::ostream& out, const range& r) { - return out << "[" << r.begin() << "," << r.end() << ")"; - } - - }; - - template - struct range_spliter { - - using rng = range; - - static fragments split(std::size_t, const rng& r) { - const auto& a = r.begin(); - const auto& b = r.end(); - auto m = a + (b - a)/2; - return make_fragments(rng(a,m),rng(m,b)); - } - - static std::size_t getSplitDimension(std::size_t) { - return 0; - } - }; - - template< - template class Container, - typename Iter, size_t dims - > - struct range_spliter> { - - using rng = range>; - - static fragments> split(std::size_t depth, const rng& r) { - - __allscale_unused const auto volume = detail::volume>(); - - // get split dimension - auto splitDim = getSplitDimension(depth); - - // compute range fragments - const auto& begin = r.begin(); - const auto& end = r.end(); - - // split the longest dimension, keep the others as they are - auto midA = end; - auto midB = begin; - midA[splitDim] = midB[splitDim] = range_spliter::split(depth,range(begin[splitDim],end[splitDim])).left.end(); - - // make sure no points got lost - assert_eq(volume(begin,end), volume(begin,midA) + volume(midB,end)); - - // create result - return make_fragments(rng(begin,midA),rng(midB,end)); - } - - static std::size_t getSplitDimension(std::size_t depth) { - return depth % dims; - } - - }; - - } // end namespace detail - - - - // ------------------------------------------------------------------------------------------- - // Synchronization Definitions - // ------------------------------------------------------------------------------------------- - - namespace detail { - - /** - * An entity to reference ranges of iterations of a loop. - */ - template - class iteration_reference { - - /** - * The range covered by the iterations referenced by this object. - */ - range _range; - - /** - * The reference to the task processing the covered range. - */ - core::task_reference handle; - - /** - * The recursive depth of the referenced iteration range. - */ - std::size_t depth; - - public: - - iteration_reference(const range& range, const core::task_reference& handle, std::size_t depth) - : _range(range), handle(handle), depth(depth) {} - - iteration_reference(const range& _range = range()) : _range(_range), depth(0) {} - - iteration_reference(const iteration_reference&) = default; - iteration_reference(iteration_reference&&) = default; - - iteration_reference& operator=(const iteration_reference&) = default; - iteration_reference& operator=(iteration_reference&&) = default; - - void wait() const { - if (handle.valid()) handle.wait(); - } - - iteration_reference getLeft() const { - return { range_spliter::split(depth,_range).left, handle.getLeft(), depth+1 }; - } - - iteration_reference getRight() const { - return { range_spliter::split(depth,_range).right, handle.getRight(), depth+1 }; - } - - operator core::task_reference() const { - return handle; - } - - const range& getRange() const { - return _range; - } - - const core::task_reference& getHandle() const { - return handle; - } - - std::size_t getDepth() const { - return depth; - } - }; - - - /** - * An entity to reference the full range of iterations of a loop. This token - * can not be copied and will wait for the completion of the loop upon destruction. - */ - template - class loop_reference : public iteration_reference { - - public: - - loop_reference(const range& range, core::treeture&& handle) - : iteration_reference(range, std::move(handle), 0) {} - - loop_reference() {}; - loop_reference(const loop_reference&) = delete; - loop_reference(loop_reference&&) = default; - - loop_reference& operator=(const loop_reference&) = delete; - loop_reference& operator=(loop_reference&&) = default; - - ~loop_reference() { this->wait(); } - - }; - - } // end namespace detail - - - - // --------------------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------------------- - - - template - detail::loop_reference pfor(const detail::range& r, const Body& body, const Dependency& dependency) { - - struct RecArgs { - std::size_t depth; - detail::range range; - Dependency dependencies; - }; - - // trigger parallel processing - return { r, core::prec( - [](const RecArgs& rg) { - // if there is only one element left, we reached the base case - return rg.range.size() <= 1; - }, - [body](const RecArgs& rg) { - // apply the body operation to every element in the remaining range - rg.range.forEach(body); - }, - core::pick( - [](const RecArgs& rg, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = rg.range.split(rg.depth); - auto& left = fragments.left; - auto& right = fragments.right; - auto dep = rg.dependencies.split(left,right); - return core::parallel( - nested(dep.left.toCoreDependencies(), RecArgs{rg.depth+1, left, dep.left} ), - nested(dep.right.toCoreDependencies(), RecArgs{rg.depth+1, right,dep.right}) - ); - }, - [body](const RecArgs& rg, const auto&) { - // the alternative is processing the step sequentially - rg.range.forEach(body); - } - ) - )(dependency.toCoreDependencies(),RecArgs{0,r,dependency}) }; - } - - template - detail::loop_reference pfor(const detail::range& r, const Body& body, const no_dependencies&) { - - struct RecArgs { - std::size_t depth; - detail::range range; - }; - - // trigger parallel processing - return { r, core::prec( - [](const RecArgs& r) { - // if there is only one element left, we reached the base case - return r.range.size() <= 1; - }, - [body](const RecArgs& r) { - // apply the body operation to every element in the remaining range - r.range.forEach(body); - }, - core::pick( - [](const RecArgs& r, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = r.range.split(r.depth); - return core::parallel( - nested(RecArgs{r.depth+1,fragments.left}), - nested(RecArgs{r.depth+1,fragments.right}) - ); - }, - [body](const RecArgs& r, const auto&) { - // the alternative is processing the step sequentially - r.range.forEach(body); - } - ) - )(RecArgs{0,r}) }; - } - - class no_dependency : public detail::loop_dependency { - - public: - - auto toCoreDependencies() const { - return core::after(); - } - - template - detail::SubDependencies split(const Range&, const Range&) const { - // split dependencies, which is actually nothing to do ... - return { no_dependency(), no_dependency() }; - - } - - friend std::ostream& operator<< (std::ostream& out, const no_dependency&) { - return out << "none"; - } - - }; - - inline no_dependency no_sync() { - return no_dependency(); - } - - // -------------------------------------------------------------------------------------------------------- - - template - class conjunction_sync_dependency : public detail::loop_dependency { - - using nested_type = conjunction_sync_dependency; - - First first; - - nested_type nested; - - conjunction_sync_dependency(const First& first, const nested_type& nested) - : first(first), nested(nested) {} - - public: - - conjunction_sync_dependency(const First& first, const Rest& ... rest) - : first(first), nested(rest...) {} - - auto toCoreDependencies() const { - return concat(first.toCoreDependencies(),nested.toCoreDependencies()); - } - - template - detail::SubDependencies split(const detail::range& left, const detail::range& right) const { - - // get fragments - auto firstFragments = first.split(left,right); - auto nestedFragments = nested.split(left,right); - - // create resulting dependencies - return { - { firstFragments.left, nestedFragments.left }, - { firstFragments.right, nestedFragments.right } - }; - - } - - friend std::ostream& operator<< (std::ostream& out, const conjunction_sync_dependency& dep) { - return out << dep.first << " && " << dep.nested; - } - - }; - - // special case for a conjunction of a single dependency - this is just that dependency - template - class conjunction_sync_dependency : public Dependency { - public: - conjunction_sync_dependency(const Dependency& dep) : Dependency(dep) {} - }; - - // special case for an empty conjunction - this is no dependency - template<> - class conjunction_sync_dependency<> : public no_dependency { - public: - conjunction_sync_dependency() : no_dependency() {} - conjunction_sync_dependency(const no_dependency& dep) : no_dependency(dep) {} - }; - - // -------------------------------------------------------------------------------------------------------- - - template - class one_on_one_dependency : public detail::loop_dependency { - - detail::iteration_reference loop; - - public: - - one_on_one_dependency(const detail::iteration_reference& loop) - : loop(loop) {} - - auto getCenterRange() const { - return loop.getRange(); - } - - core::impl::reference::dependencies> toCoreDependencies() const { - return core::after(loop.getHandle()); - } - - detail::SubDependencies> split(const detail::range& left, const detail::range& right) const { - - // get left and right loop fragments - auto loopLeft = loop.getLeft(); - auto loopRight = loop.getRight(); - - // split dependencies, thereby checking range coverage - return { - // we take the sub-task if it covers the targeted range, otherwise we stick to the current range - loopLeft.getRange().covers(left) ? one_on_one_dependency{loopLeft} : *this, - loopRight.getRange().covers(right) ? one_on_one_dependency{loopRight} : *this - }; - - } - - friend std::ostream& operator<< (std::ostream& out, const one_on_one_dependency& dep) { - return out << dep.loop.getRange(); - } - - }; - - - template - class small_neighborhood_sync_dependency : public detail::loop_dependency { - - // determine the number of dimensions - enum { num_dimensions = detail::dimensions::value }; - - // the type of iteration dependency - using iteration_reference = detail::iteration_reference; - - // on each dimension, two dependencies are stored in each direction - struct deps_pair { - iteration_reference left; - iteration_reference right; - }; - - // save two dependencies for each dimension - using deps_list = std::array; - - // on dependency covering the central area - iteration_reference center; - - // the neighboring dependencies - deps_list neighborhood; - - // and internal constructor required by the split operation - small_neighborhood_sync_dependency() {} - - public: - - small_neighborhood_sync_dependency(const iteration_reference& loop) - : center(loop), neighborhood() {} - - const detail::range& getCenterRange() const { - return center.getRange(); - } - - std::vector> getRanges() const { - std::vector> res; - res.push_back(center.getRange()); - for(std::size_t i=0; i - core::impl::reference::dependencies> toCoreDependencies(const std::index_sequence&) const { - return core::after( - center, - neighborhood[Dims].left ..., - neighborhood[Dims].right ... - ); - } - - public: - - core::impl::reference::dependencies> toCoreDependencies() const { - return toCoreDependencies(std::make_index_sequence()); - } - - detail::SubDependencies> split(const detail::range& left, const detail::range& right) const { - - using splitter = detail::range_spliter; - - // create new left and right dependencies - small_neighborhood_sync_dependency res_left; - small_neighborhood_sync_dependency res_right; - - // update center - res_left.center = center.getLeft(); - res_right.center = center.getRight(); - - // update neighbors except split dimension - bool save_left = true; - bool save_right = true; - auto splitDim = splitter::getSplitDimension(center.getDepth()); - for(std::size_t i =0; i - struct full_dependency_block { - - using iteration_reference = detail::iteration_reference; - - using nested = full_dependency_block; - - enum { num_dependencies = nested::num_dependencies * 3 }; - - std::array dependencies; - - void setCenter(const iteration_reference& ref) { - dependencies[1].setCenter(ref); - } - - const iteration_reference& getCenter() const { - return dependencies[1].getCenter(); - } - - template - void forEach(const Op& op) const { - dependencies[0].forEach(op); - dependencies[1].forEach(op); - dependencies[2].forEach(op); - } - - core::impl::reference::dependencies> toCoreDependencies() const { - return produceCoreDependencies(*this); - } - - template - static auto produceCoreDependencies(const Blocks& ... blocks) { - return nested::template produceCoreDependencies(blocks.dependencies[0]...,blocks.dependencies[1]...,blocks.dependencies[2]...); - } - - full_dependency_block narrowLeft(bool& save, std::size_t splitDimension, std::size_t radius) const { - full_dependency_block res; - if (Dims - 1 == splitDimension) { - res.dependencies[0] = dependencies[0].narrowRight(save,splitDimension, radius); - res.dependencies[1] = dependencies[1].narrowLeft(save,splitDimension, radius); - res.dependencies[2] = dependencies[1].narrowRight(save,splitDimension, radius); - } else { - res.dependencies[0] = dependencies[0].narrowLeft(save,splitDimension, radius); - res.dependencies[1] = dependencies[1].narrowLeft(save,splitDimension, radius); - res.dependencies[2] = dependencies[2].narrowLeft(save,splitDimension, radius); - } - return res; - } - - full_dependency_block narrowRight(bool& save, std::size_t splitDimension, std::size_t radius) const { - full_dependency_block res; - if (Dims - 1 == splitDimension) { - res.dependencies[0] = dependencies[1].narrowLeft(save,splitDimension, radius); - res.dependencies[1] = dependencies[1].narrowRight(save,splitDimension, radius); - res.dependencies[2] = dependencies[2].narrowLeft(save,splitDimension, radius); - } else { - res.dependencies[0] = dependencies[0].narrowRight(save,splitDimension, radius); - res.dependencies[1] = dependencies[1].narrowRight(save,splitDimension, radius); - res.dependencies[2] = dependencies[2].narrowRight(save,splitDimension, radius); - } - return res; - } - }; - - template - struct full_dependency_block { - - using iteration_reference = detail::iteration_reference; - - enum { num_dependencies = 1 }; - - iteration_reference dependency; - - void setCenter(const iteration_reference& ref) { - dependency = ref; - } - - const iteration_reference& getCenter() const { - return dependency; - } - - template - void forEach(const Op& op) const { - op(dependency); - } - - core::impl::reference::dependencies> toCoreDependencies() const { - return core::after(dependency); - } - - template - static auto produceCoreDependencies(const Blocks& ... blocks) { - return core::after(blocks.dependency...); - } - - full_dependency_block narrowLeft(bool& save, std::size_t, std::size_t radius) const { - full_dependency_block res; - res.dependency = dependency.getLeft(); - if (!dependency.getRange().empty() && getMinimumDimensionLength(res.dependency.getRange()) < radius) save = false; - return res; - } - - full_dependency_block narrowRight(bool& save, std::size_t, std::size_t radius) const { - full_dependency_block res; - res.dependency = dependency.getRight(); - if (!dependency.getRange().empty() && getMinimumDimensionLength(res.dependency.getRange()) < radius) save = false; - return res; - } - }; - - } - - template - class full_neighborhood_sync_dependency : public detail::loop_dependency { - - enum { num_dimensions = detail::dimensions::value }; - - using deps_block = detail::full_dependency_block; - - deps_block deps; - - full_neighborhood_sync_dependency(const deps_block& deps) : deps(deps) {} - - public: - - full_neighborhood_sync_dependency(const detail::iteration_reference& loop) : deps() { - deps.setCenter(loop); - } - - const detail::range& getCenterRange() const { - return deps.getCenter().getRange(); - } - - std::vector> getRanges() const { - std::vector> res; - deps.forEach([&](const auto& dep) { - if (!dep.getRange().empty()) res.push_back(dep.getRange()); - }); - return res; - } - - auto toCoreDependencies() const { - return deps.toCoreDependencies(); - } - - detail::SubDependencies> split(const detail::range& left, const detail::range& right) const { - using splitter = detail::range_spliter; - - auto splitDim = splitter::getSplitDimension(deps.getCenter().getDepth()); - - // prepare safety flag - bool save_left = true; - bool save_right = true; - - // compute left and right sub-dependencies - full_neighborhood_sync_dependency res_left(deps.narrowLeft(save_left,splitDim,radius)); - full_neighborhood_sync_dependency res_right(deps.narrowRight(save_right,splitDim,radius)); - - // check coverage and build up result - return { - save_left && res_left.getCenterRange().covers(left) ? res_left : *this, - save_right && res_right.getCenterRange().covers(right) ? res_right : *this - }; - } - - friend std::ostream& operator<< (std::ostream& out, const full_neighborhood_sync_dependency& dep) { - return out << "[" << utils::join(",", dep.getRanges()) << "]"; - } - - }; - - - template - class after_all_sync_dependency : public detail::loop_dependency { - - // the type of iteration dependency - using iteration_reference = detail::iteration_reference; - - iteration_reference dependency; - - public: - - after_all_sync_dependency(const detail::iteration_reference& loop) - : dependency(loop) {} - - const detail::range& getCenterRange() const { - return dependency.getRange(); - } - - std::vector> getRanges() const { - std::vector> res; - res.push_back(dependency.getRange()); - return res; - } - - auto toCoreDependencies() const { - return core::after(dependency); - } - - detail::SubDependencies> split(const detail::range&, const detail::range&) const { - // this dependency never changes - return { *this, *this }; - } - - friend std::ostream& operator<< (std::ostream& out, const after_all_sync_dependency& dep) { - return out << "[" << dep.getCenterRange() << "]"; - } - - }; - - - template - detail::loop_reference pforWithBoundary(const detail::range& r, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const Dependency& dependency) { - - struct RecArgs { - std::size_t depth; - detail::range range; - Dependency dependencies; - }; - - // keep a copy of the full range - auto full = r; - - // trigger parallel processing - return { r, core::prec( - [](const RecArgs& rg) { - // if there is only one element left, we reached the base case - return rg.range.size() <= 1; - }, - [innerBody,boundaryBody,full](const RecArgs& rg) { - // apply the body operation to every element in the remaining range - rg.range.forEachWithBoundary(full,innerBody,boundaryBody); - }, - core::pick( - [](const RecArgs& rg, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = rg.range.split(rg.depth); - auto& left = fragments.left; - auto& right = fragments.right; - auto dep = rg.dependencies.split(left,right); - return core::parallel( - nested(dep.left.toCoreDependencies(), RecArgs{rg.depth+1,left, dep.left} ), - nested(dep.right.toCoreDependencies(), RecArgs{rg.depth+1,right,dep.right}) - ); - }, - [innerBody,boundaryBody,full](const RecArgs& rg, const auto&) { - // the alternative is processing the step sequentially - rg.range.forEachWithBoundary(full,innerBody,boundaryBody); - } - ) - )(dependency.toCoreDependencies(),RecArgs{0,r,dependency}) }; - } - - template - detail::loop_reference pforWithBoundary(const detail::range& r, const InnerBody& innerBody, const BoundaryBody& boundaryBody, const no_dependencies&) { - - struct RecArgs { - std::size_t depth; - detail::range range; - }; - - // keep a copy of the full range - auto full = r; - - // trigger parallel processing - return { r, core::prec( - [](const RecArgs& r) { - // if there is only one element left, we reached the base case - return r.range.size() <= 1; - }, - [innerBody,boundaryBody,full](const RecArgs& r) { - // apply the body operation to every element in the remaining range - r.range.forEachWithBoundary(full,innerBody,boundaryBody); - }, - core::pick( - [](const RecArgs& r, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = r.range.split(r.depth); - auto& left = fragments.left; - auto& right = fragments.right; - return core::parallel( - nested(RecArgs{ r.depth+1, left }), - nested(RecArgs{ r.depth+1, right }) - ); - }, - [innerBody,boundaryBody,full](const RecArgs& r, const auto&) { - // the alternative is processing the step sequentially - r.range.forEachWithBoundary(full,innerBody,boundaryBody); - } - ) - )(RecArgs{ 0 , r }) }; - } - - - - - template - detail::loop_reference after(const detail::loop_reference& loop, const Point& point, const Action& action) { - - // get the full range - auto r = loop.getRange(); - - struct RecArgs { - std::size_t depth; - detail::range range; - one_on_one_dependency dependencies; - }; - - // get the initial dependency - auto dependency = one_on_one(loop); - - // trigger parallel processing - return { r, core::prec( - [point](const RecArgs& rg) { - // check whether the point of action is covered by the current range - return !rg.range.covers(point); - }, - [action,point](const RecArgs& rg) { - // trigger the action if the current range covers the point - if (rg.range.covers(point)) action(); - - }, - core::pick( - [](const RecArgs& rg, const auto& nested) { - // in the step case we split the range and process sub-ranges recursively - auto fragments = rg.range.split(rg.depth); - auto& left = fragments.left; - auto& right = fragments.right; - auto dep = rg.dependencies.split(left,right); - return core::parallel( - nested(dep.left.toCoreDependencies(), RecArgs{rg.depth+1, left, dep.left} ), - nested(dep.right.toCoreDependencies(), RecArgs{rg.depth+1, right,dep.right}) - ); - }, - [action,point](const RecArgs& rg, const auto&) { - // trigger the action if the current range covers the point - if (rg.range.covers(point)) action(); - } - ) - )(dependency.toCoreDependencies(),RecArgs{0,r,dependency}) }; - } - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/preduce.h b/vendor/allscale/api/user/algorithm/preduce.h deleted file mode 100644 index a7e8616a5..000000000 --- a/vendor/allscale/api/user/algorithm/preduce.h +++ /dev/null @@ -1,205 +0,0 @@ -#pragma once - -#include - -#include "allscale/api/core/prec.h" - -#include "allscale/api/user/algorithm/pfor.h" - -#include "allscale/utils/assert.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - - // ----- fold / reduce ------ - - /** - * The most generic implementation of the reduction operator. All other - * reductions are reduced to this implementation. - * - * @param a the begin of a range of elements to be reduced - * @param b the end (exclusive) of a range of elements to be reduced - * @param reduce the operation capable of performing a reduction over a subrange - * @param aggregate the operation capable of performing a reduction over a subrange - */ - template< - typename Iter, - typename RangeReductionOp, - typename AggregationOp - > - core::treeture::result_type> - preduce( - const Iter& a, - const Iter& b, - const RangeReductionOp& reduce, - const AggregationOp& aggregate - ) { - - using res_type = typename utils::lambda_traits::result_type; - - // define the argument struct - struct RecArgs { - std::size_t depth; - algorithm::detail::range range; - }; - - return core::prec( - [](const RecArgs& r) { - return r.range.size() <= 1; - }, - [reduce](const RecArgs& r)->res_type { - return reduce(r.range.begin(),r.range.end()); - }, - core::pick( - [aggregate](const RecArgs& r, const auto& nested) { - // here we have the binary splitting - auto fragments = r.range.split(r.depth); - auto left = fragments.left; - auto right = fragments.right; - return core::combine(nested(RecArgs{ r.depth+1, left }),nested(RecArgs{ r.depth+1, right }),aggregate); - }, - [reduce](const RecArgs& r, const auto&)->res_type { - return reduce(r.range.begin(),r.range.end()); - } - ) - )(RecArgs{ 0, algorithm::detail::range(a, b) }); - } - - - - /** - * A variant of the preduce operator where the range based reduction step - * is assembled from a set of utilities to create, update, and reduce a local temporary value. - */ - template< - typename Iter, - typename FoldOp, - typename ReduceOp, - typename InitLocalState, - typename FinishLocalState - > - core::treeture::result_type> - preduce( - const Iter& a, - const Iter& b, - const FoldOp& fold, - const ReduceOp& reduce, - const InitLocalState& init, - const FinishLocalState& finish - ) { - - return preduce( - a, b, [init,fold,finish](const Iter& a, const Iter& b) { - auto res = init(); - algorithm::detail::range(a,b).forEach([&](const auto& cur){ - fold(cur,res); - }); - return finish(res); - }, - reduce - ); - - } - - // ----- reduction ------ - - template - core::treeture::result_type> - preduce(const Iter& a, const Iter& b, const Op& op) { - using res_type = typename utils::lambda_traits::result_type; - - return preduce( - a,b, - [op](const res_type& cur, res_type& res) { - res = op(cur,res); - }, - op, - [](){ return res_type(); }, - [](const res_type& r) { return r; } - ); - - } - - /** - * A parallel reduce implementation over the elements of the given container. - */ - template - core::treeture::result_type> - preduce(Container& c, Op& op) { - return preduce(c.begin(), c.end(), op); - } - - /** - * A parallel reduce implementation over the elements of the given container. - */ - template - core::treeture::result_type> - preduce(const Container& c, const Op& op) { - return preduce(c.begin(), c.end(), op); - } - - - template< - typename Iter, - typename MapOp, - typename ReduceOp, - typename InitLocalState - > - core::treeture::result_type> - preduce( - const Iter& a, - const Iter& b, - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init - ) { - - return preduce(a, b, map, reduce, init, ([](typename utils::lambda_traits::result_type r) { return r; } )); - } - - template< - typename Container, - typename MapOp, - typename ReduceOp, - typename InitLocalState, - typename ReduceLocalState - > - core::treeture::result_type> - preduce( - const Container& c, - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init, - const ReduceLocalState& exit - ) { - - return preduce(c.begin(), c.end(), map, reduce, init, exit); - - } - - template< - typename Container, - typename MapOp, - typename ReduceOp, - typename InitLocalState - > - core::treeture::result_type> - preduce( - const Container& c, - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init - ) { - - return preduce(c.begin(), c.end(), map, reduce, init); - - } - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/stencil.h b/vendor/allscale/api/user/algorithm/stencil.h deleted file mode 100644 index 6677cc99a..000000000 --- a/vendor/allscale/api/user/algorithm/stencil.h +++ /dev/null @@ -1,1474 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "allscale/api/user/data/grid.h" -#include "allscale/api/user/data/static_grid.h" - -#include "allscale/api/user/algorithm/pfor.h" -#include "allscale/api/user/algorithm/async.h" -#include "allscale/api/user/algorithm/internal/operation_reference.h" - -#include "allscale/utils/bitmanipulation.h" -#include "allscale/utils/unused.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - - // --------------------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------------------- - - - template - using Coordinate = utils::Vector; - - template - using Size = Coordinate; - - namespace implementation { - - struct sequential_iterative; - - struct coarse_grained_iterative; - - struct fine_grained_iterative; - - struct sequential_recursive; - - struct parallel_recursive; - - } - - template - class Observer; - - template - struct is_observer; - - template - class stencil_reference; - - template< - typename Impl = implementation::fine_grained_iterative, typename Container, typename InnerUpdate, typename BoundaryUpdate, - typename ... ObserverTimeFilters, typename ... ObserverLocationFilters, typename ... ObserverActions - > - std::enable_if_t::value,stencil_reference> stencil( - Container& res, std::size_t steps, const InnerUpdate& innerUpdate, const BoundaryUpdate& boundaryUpdate, - const Observer& ... observers - ); - - template< - typename Impl = implementation::fine_grained_iterative, typename Container, typename Update, - typename ... ObserverTimeFilters, typename ... ObserverLocationFilters, typename ... ObserverActions - > - stencil_reference stencil( - Container& res, std::size_t steps, const Update& update, - const Observer& ... observers - ); - - template - Observer observer(const TimeStampFilter& timeFilter, const LocationFilter& locationFilter, const Action& action); - - // --------------------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------------------- - - - template - class stencil_reference : public internal::operation_reference { - - public: - - // inherit all constructors - using operation_reference::operation_reference; - - }; - - template< - typename Impl, typename Container, typename InnerUpdate, typename BoundaryUpdate, - typename ... ObserverTimeFilters, typename ... ObserverLocationFilters, typename ... ObserverActions - > - std::enable_if_t::value,stencil_reference> stencil( - Container& a, std::size_t steps, const InnerUpdate& innerUpdate, const BoundaryUpdate& boundaryUpdate, - const Observer& ... observers - ) { - - // forward everything to the implementation - return Impl().process(a,steps,innerUpdate,boundaryUpdate,observers...); - - } - - template< - typename Impl, typename Container, typename Update, - typename ... ObserverTimeFilters, typename ... ObserverLocationFilters, typename ... ObserverActions - > - stencil_reference stencil(Container& a, std::size_t steps, const Update& update,const Observer& ... observers) { - - // use the same update for inner and boundary updates - return stencil(a,steps,update,update,observers...); - - } - - template - class Observer { - public: - TimeStampFilter isInterestedInTime; - LocationFilter isInterestedInLocation; - Action trigger; - - Observer(const TimeStampFilter& timeFilter, const LocationFilter& locationFilter, const Action& action) - : isInterestedInTime(timeFilter), isInterestedInLocation(locationFilter), trigger(action) {} - }; - - template - struct is_observer : public std::false_type {}; - - template - struct is_observer> : public std::true_type {}; - - template - Observer observer(const TimeStampFilter& timeFilter, const LocationFilter& locationFilter, const Action& action) { - return Observer(timeFilter,locationFilter,action); - } - - namespace implementation { - - namespace detail { - - template - void staticForEach(const Op&) { - // nothing to do - } - - template - void staticForEach(const Op& op, const First& first, const Rest& ... rest) { - op(first); - staticForEach(op,rest...); - } - - } - - - // -- Iterative Stencil Implementation --------------------------------------------------------- - - struct sequential_iterative { - - template - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& innerUpdate, const BoundaryUpdate& boundaryUpdate, const Observers& ... observers) { - - // return handle to asynchronous execution - return async([&a,steps,innerUpdate,boundaryUpdate,observers...]{ - - // iterative implementation - Container b(a.size()); - - Container* x = &a; - Container* y = &b; - - using iter_type = decltype(a.size()); - - for(std::size_t t=0; t - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& inner, const BoundaryUpdate& boundary, const Observers& ... observers) { - - // return handle to asynchronous execution - return async([&a,steps,inner,boundary,observers...]{ - - // iterative implementation - Container b(a.size()); - - Container* x = &a; - Container* y = &b; - - using iter_type = decltype(a.size()); - - for(std::size_t t=0; t - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& inner, const BoundaryUpdate& boundary, const Observers& ... observers) { - - // return handle to asynchronous execution - return async([&a,steps,inner,boundary,observers...]{ - - // iterative implementation - Container b(a.size()); - - Container* x = &a; - Container* y = &b; - - using iter_type = decltype(a.size()); - - user::algorithm::detail::loop_reference ref; - - for(std::size_t t=0; t::element_type; - using time_type = std::size_t; - - - template - using Slopes = utils::Vector; - - template - class Base { - public: - - struct range { - index_type begin; - index_type end; - }; - - std::array boundaries; - - static Base zero() { - return full(0); - } - - static Base full(std::size_t size) { - static_assert(dims == 1, "This constructor only supports 1-d bases."); - Base res; - res.boundaries[0] = { 0, (index_type)size }; - return res; - } - - template - static Base full(const utils::Vector& size) { - Base res; - for(std::size_t i=0; i= cur.end) return 0; - res *= (cur.end - cur.begin); - } - return res; - } - - Coordinate extend() const { - Coordinate res; - for(std::size_t i = 0; i& other) const { - Base res; - for(std::size_t i=0; i - struct plain_scanner { - - plain_scanner nested; - - template - void operator()(const Base& base, const InnerBody& inner, const BoundaryBody& boundary, const ObserverBody& observer, Coordinate& pos, std::size_t t, const Coordinate& size) const { - constexpr const auto idx = full_dim - dim - 1; - - // compute boundaries - auto from = base[idx].begin; - auto to = base[idx].end; - auto length = size[idx]; - - // shift range to size window - if (from > length) { - from -= length; - to -= length; - } - - // process range from start to limit - auto limit = std::min(to,length); - processRange(base,inner,boundary,observer,pos,t,size,from,limit); - - // and if necessary the elements beyond, after a wrap-around - if (to <= length) return; - - to -= length; - processRange(base,inner,boundary,observer,pos,t,size,0,to); - } - - template - void processRange(const Base& base, const InnerBody& inner, const BoundaryBody& boundary, const ObserverBody& observer, Coordinate& pos, std::size_t t, const Coordinate& size, std::int64_t from, std::int64_t to) const { - constexpr const auto idx = full_dim - dim - 1; - - // skip an empty range - if (from >= to) return; - - // get inner range - auto innerFrom = from; - auto innerTo = to; - - // check left boundary - if (innerFrom == 0) { - - // process left as a boundary - pos[idx] = 0; - nested(base,boundary,boundary,observer,pos,t,size); - - // skip this one from the inner part - innerFrom++; - } - - // check right boundary - if (innerTo == size[idx]) { - innerTo--; - } - - // process inner part - for(pos[idx]=innerFrom; pos[idx] - struct plain_scanner<0> { - - template - void operator()(const Base& base, const InnerBody& inner, const BoundaryBody& boundary, const ObserverBody& observer, Coordinate& pos, std::size_t t, const Coordinate& size) const { - constexpr const auto idx = full_dim - 1; - - // compute boundaries - auto from = base[idx].begin; - auto to = base[idx].end; - auto length = size[idx]; - - // shift range to size window - if (from > length) { - from -= length; - to -= length; - } - - // process range from start to limit - auto limit = std::min(to,length); - processRange(inner,boundary,observer,pos,t,size,from,limit); - - // and if necessary the elements beyond, after a wrap-around - if (to <= length) return; - - to -= length; - processRange(inner,boundary,observer,pos,t,size,0,to); - } - - template - void processRange(const InnerBody& inner, const BoundaryBody& boundary, const ObserverBody& observer, Coordinate& pos, std::size_t t, const Coordinate& size, std::int64_t from, std::int64_t to) const { - constexpr const auto idx = full_dim - 1; - - // skip an empty range - if (from >= to) return; - - // get inner range - auto innerFrom = from; - auto innerTo = to; - - // check left boundary - if (innerFrom == 0) { - - // process left as a boundary - pos[idx] = 0; - boundary(pos,t); - - // skip this one from the inner part - innerFrom++; - } - - // check right boundary - if (innerTo == size[idx]) { - innerTo--; - } - - // process inner part - for(pos[idx]=innerFrom; pos[idx] - class TaskDependencyList { - - core::task_reference dep; - - TaskDependencyList nested; - - public: - - TaskDependencyList() {} - - template - TaskDependencyList(const core::task_reference& first, const Rest& ... rest) - : dep(first), nested(rest...) {} - - - // support conversion into core dependencies - auto toCoreDependencies() const { - return nested.toCoreDependencies(dep); - } - - template - auto toCoreDependencies(const Deps& ... deps) const { - return nested.toCoreDependencies(dep,deps...); - } - - }; - - template<> - class TaskDependencyList<0> { - - public: - - TaskDependencyList() {} - - // support conversion into core dependencies - auto toCoreDependencies() const { - return core::after(); - } - - template - auto toCoreDependencies(const Deps& ... deps) const { - return core::after(deps...); - } - - }; - - - template - class ZoidDependencies : public TaskDependencyList<3*dims> { - - using super = TaskDependencyList<3*dims>; - - public: - - // TODO: support dependency refinement - - // inherit constructors - using super::super; - - }; - - - template - class Zoid { - - Base base; // the projection of the zoid to the space dimensions - - Slopes slopes; // the direction of the slopes - - time_type t_begin; // the start time - time_type t_end; // the end time - - public: - - Zoid() {} - - Zoid(const Base& base, const Slopes& slopes, std::size_t t_begin, std::size_t t_end) - : base(base), slopes(slopes), t_begin(t_begin), t_end(t_end) {} - - - template - void forEach(const EvenOp& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserverOp& evenObserver, const OddObserverOp& oddObserver, const Size& limits) const { - - // TODO: make this one cache oblivious - - // create the plain scanner - plain_scanner scanner; - - Coordinate x; - auto plainBase = base; - - // over the time - for(std::size_t t = t_begin; t < t_end; ++t) { - - // process this plain - if ( t & 0x1 ) { - scanner(plainBase, odd, oddBoundary, oddObserver, x, t, limits); - } else { - scanner(plainBase, even, evenBoundary, evenObserver, x, t, limits); - } - - // update the plain for the next level - for(std::size_t i=0; i - core::treeture pforEach(const ZoidDependencies& deps, const EvenOd& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserverOp& evenObserver, const OddObserverOp& oddObserver, const Size& limits) const { - - struct Params { - Zoid zoid; - ZoidDependencies deps; - }; - - // recursively decompose the covered space-time volume - return core::prec( - [](const Params& params) { - // check whether this zoid can no longer be divided - return params.zoid.isTerminal(); - }, - [&](const Params& params) { - // process final steps sequentially - params.zoid.forEach(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - }, - core::pick( - [](const Params& params, const auto& rec) { - // unpack parameters - const auto& zoid = params.zoid; - const auto& deps = params.deps; - - // make sure the zoid is not terminal - assert_false(zoid.isTerminal()); - - // check whether it can be split in space - if (!zoid.isSpaceSplitable()) { - // we need a time split - auto parts = zoid.splitTime(); - return core::sequential( - rec(deps.toCoreDependencies(),Params{parts.bottom,deps}), - rec(deps.toCoreDependencies(),Params{parts.top,deps}) - ); - } - - // let's do a space split - auto parts = zoid.splitSpace(); - - // schedule depending on the orientation - return (parts.opening) - ? core::sequential( - rec(deps.toCoreDependencies(),Params{parts.c,deps}), - core::parallel( - rec(deps.toCoreDependencies(),Params{parts.l,deps}), - rec(deps.toCoreDependencies(),Params{parts.r,deps}) - ) - ) - : core::sequential( - core::parallel( - rec(deps.toCoreDependencies(),Params{parts.l,deps}), - rec(deps.toCoreDependencies(),Params{parts.r,deps}) - ), - rec(deps.toCoreDependencies(),Params{parts.c,deps}) - ); - - - }, - [&](const Params& params, const auto&) { - // provide sequential alternative - params.zoid.forEach(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - } - ) - )(deps.toCoreDependencies(),Params{*this,deps}); - - } - - template - core::treeture pforEach(const EvenOd& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserverOp& evenObserver, const OddObserverOp& oddObserver, const Size& limits) const { - // run the pforEach with no initial dependencies - return pforEach(ZoidDependencies(),even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - } - - - /** - * The height of this zoid in temporal direction. - */ - std::size_t getHeight() const { - return std::size_t(t_end-t_begin); - } - - /** - * Compute the number of elements this volume is covering - * when being projected to the space domain. - */ - int getFootprint() const { - int size = 1; - int dt = getHeight(); - for(std::size_t i=0; i 4*getHeight(); - } - - // the result of a time split - struct TimeDecomposition { - Zoid top; - Zoid bottom; - }; - - /** - * Splits this zoid in two sub-zoids along the time dimension. The - * First component will be the bottom, the second the top. - */ - TimeDecomposition splitTime() const { - auto split = getHeight() / 2; - - Base mid = base; - - for(std::size_t i=0; imax_width) { - max_width = width; - max_dim = i; - } - } - - // the max dimension is the split dimensin - auto split_dim = max_dim; - - // check whether longest dimension can be split - assert(isSplitable(split_dim)); - - // create 3 fragments - SpaceDecomposition res { - *this, *this, *this, (slopes[split_dim] < 0) - }; - - // get the split point - auto center = (base.boundaries[split_dim].begin + base.boundaries[split_dim].end) / 2; - auto left = center; - auto right = center; - - if (slopes[split_dim] < 0) { - auto hight = getHeight(); - left -= hight; - right += hight; - } - - res.l.base.boundaries[split_dim].end = left; - res.c.base.boundaries[split_dim] = { left, right }; - res.r.base.boundaries[split_dim].begin = right; - - // invert direction of center piece - res.c.slopes[split_dim] *= -1; - - // return decomposition - return res; - } - - }; - - /** - * A utility class for enumerating the dependencies of a task in a - * n-dimensional top-level task graph. - */ - template - struct task_dependency_extractor { - - template - void operator()(const Body& body, const Args& ... args) { - task_dependency_extractor nested; - if (taskIdx & (1< - struct task_dependency_extractor { - - template - void operator()(const Body& body, const Args& ... args) { - if (taskIdx & 0x1) { - body(args...,taskIdx & ~0x1); - } else { - body(args...); - } - } - - }; - - - /** - * A utility class for enumerating the dependencies of a task in a - * n-dimensional top-level task graph. - */ - template - struct task_dependency_enumerator { - - template - void operator()(const Body& body) { - for(std::size_t i=0;i<=Dims;i++) { - visit(body,i); - } - } - - template - void visit(const Body& body,std::size_t numBits) { - task_dependency_enumerator().visit(body,numBits); - if ((std::size_t)(utils::countOnes(taskIdx))==numBits) { - task_dependency_extractor()(body,taskIdx); - } - } - - }; - - template - struct task_dependency_enumerator { - - template - void visit(const Body& body,std::size_t numBits) { - if (numBits == 0) { - task_dependency_extractor<0,Dims-1>()(body,0); - } - } - - }; - - /** - * A utility to statically enumerate the tasks and dependencies for - * the top-level zoid task decomposition scheme. On the top level, - * the set of tasks and its dependencies are isomorph to the vertices - * and edges in a n-dimensional hyper cube. This utility is enumerating - * those edges, as well as listing its predecessors according to the - * sub-set relation. - */ - template - struct task_graph_enumerator { - - template - void operator()(const Body& body) { - task_dependency_enumerator enumerator; - enumerator(body); - } - - }; - - - template - class ExecutionPlan { - - using zoid_type = Zoid; - - // the execution plan of one layer -- represented as an embedded hyper-cube - using layer_plan = std::array; - - // the list of execution plans of all layers - std::vector layers; - - public: - - template - void runSequential(const EvenOp& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserver& evenObserver, const OddObserver& oddObserver, const Size& limits) const { - const std::size_t num_tasks = 1 << Dims; - - // fill a vector with the indices of the tasks - std::array order; - for(std::size_t i = 0; i - core::treeture runParallel(const EvenOp& even, const OddOp& odd, const EvenBoundaryOp& evenBoundary, const OddBoundaryOp& oddBoundary, const EvenObserver& evenObserver, const OddObserver& oddObserver, const Size& limits) const { - - const std::size_t num_tasks = 1 << Dims; - - // start tasks with mutual dependencies - core::treeture last = core::done(); - for(const auto& cur : layers) { - - std::array,num_tasks> jobs; - - // walk through graph dependency graph - enumTaskGraph([&](std::size_t idx, const auto& ... deps){ - - // special case handling for first task (has to depend on previous task) - if (idx == 0) { - // create first task - jobs[idx] = (last.isDone()) - ? cur[idx].pforEach(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits) - : cur[idx].pforEach(ZoidDependencies(last),even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - return; - } - - // create this task with corresponding dependencies - jobs[idx] = cur[idx].pforEach(ZoidDependencies(jobs[deps]...),even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,limits); - - }); - - // update last - last = std::move(jobs.back()); - } - - // return handle to last task - return last; - - } - - static ExecutionPlan create(const Base& base, std::size_t steps) { - - // get size of structure - auto size = base.extend(); - - // the the smallest width (this is the limiting factor for the height) - auto width = base.getMinimumWidth(); - - // get the height of the largest zoids, thus the height of each layer - auto height = width/2; - - // compute base area partitioning - struct split { - typename Base::range left; - typename Base::range right; - }; - std::array splits; - for(std::size_t i = 0; i(t0+height,steps); - - // create the list of zoids in this step - plan.layers.emplace_back(); - layer_plan& zoids = plan.layers.back(); - - // generate binary patterns from 0 to 2^dims - 1 - for(size_t i=0; i < (1< curBase = base; - Slopes slopes; - - // move base to center on field, edge, or corner - for(size_t j=0; j(curBase, slopes, t0, t1); - } - - } - - // build the final result - return plan; - } - - template - static void enumTaskGraph(const Body& body) { - task_graph_enumerator()(body); - } - - private: - - static std::size_t getNumBitsSet(std::size_t mask) { - return utils::countOnes((unsigned)mask); - } - - }; - - - template - struct container_info_base { - constexpr static const unsigned dimensions = Dims; - using base_type = Base; - }; - - - template - struct container_info : public container_info_base<1> { - using index_type = detail::index_type; - }; - - template - struct container_info> : public container_info_base { - using index_type = utils::Vector; - }; - - template - struct container_info> : public container_info_base { - using index_type = utils::Vector; - }; - - template - struct coordinate_converter { - auto& operator()(const Coordinate<1>& pos) { - return pos[0]; - } - }; - - template - struct coordinate_converter> { - auto& operator()(const Coordinate& pos) { - return pos; - } - }; - - template - struct coordinate_converter> { - auto& operator()(const Coordinate& pos) { - return pos; - } - }; - - } - - struct sequential_recursive { - - template - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& inner, const BoundaryUpdate& boundary, const Observers& ... observers) { - - using namespace detail; - - const unsigned dims = container_info::dimensions; - using base_t = typename container_info::base_type; - - // iterative implementation - Container b(a.size()); - - // TODO: - // - switch internally to cache-oblivious access pattern (optional) - - // get size of structure - base_t base = base_t::full(a.size()); - auto size = base.extend(); - - // wrap update function into zoid-interface adapter - auto even = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - b[p] = inner(t,p,a); - }; - - auto odd = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - a[p] = inner(t,p,b); - }; - - auto evenBoundary = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - b[p] = boundary(t,p,a); - }; - - auto oddBoundary = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - a[p] = boundary(t,p,b); - }; - - auto evenObserver = [&](const Coordinate& from, const Coordinate& to, time_t t){ - - // create a operation handling one observer - __allscale_unused auto handler = [&](const auto& observer){ - // check whether this time step is of interest - if(!observer.isInterestedInTime(t)) return; - // walk through space - pfor(from,to, - [&](const Coordinate::dimensions>& i) { - coordinate_converter conv; - if (observer.isInterestedInLocation(i)) { - observer.trigger(t,i,b[conv(i)]); - } - } - ); - }; - - // process all observers - __allscale_unused auto l = { 0,(handler(observers),0)... }; - }; - - auto oddObserver = [&](const Coordinate& from, const Coordinate& to, time_t t){ - - // create a operation handling one observer - __allscale_unused auto handler = [&](const auto& observer){ - // check whether this time step is of interest - if(!observer.isInterestedInTime(t)) return; - // walk through space - pfor(from,to, - [&](const Coordinate::dimensions>& i) { - coordinate_converter conv; - if (observer.isInterestedInLocation(i)) { - observer.trigger(t,i,a[conv(i)]); - } - } - ); - }; - - // process all observers - __allscale_unused auto l = { 0,(handler(observers),0)... }; - }; - - // get the execution plan - auto exec_plan = ExecutionPlan::create(base,steps); - - // process the execution plan - exec_plan.runSequential(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,size); - - - // make sure the result is in the a copy - if (steps % 2) { - std::swap(a,b); - } - - // done - return {}; - } - }; - - - struct parallel_recursive { - - template - stencil_reference process(Container& a, std::size_t steps, const InnerUpdate& inner, const BoundaryUpdate& boundary, const Observers& ... observers) { - - using namespace detail; - - const unsigned dims = container_info::dimensions; - using base_t = typename container_info::base_type; - - // iterative implementation - Container b(a.size()); - - // TODO: - // - switch internally to cache-oblivious access pattern (optional) - // - make parallel with fine-grained dependencies - - // get size of structure - base_t base = base_t::full(a.size()); - auto size = base.extend(); - - // wrap update function into zoid-interface adapter - auto even = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - b[p] = inner(t,p,a); - }; - - auto odd = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - a[p] = inner(t,p,b); - }; - - auto evenBoundary = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - b[p] = boundary(t,p,a); - }; - - auto oddBoundary = [&](const Coordinate& pos, time_t t){ - coordinate_converter conv; - auto p = conv(pos); - a[p] = boundary(t,p,b); - }; - - auto evenObserver = [&](const Coordinate& from, const Coordinate& to, time_t t){ - - // create a operation handling one observer - __allscale_unused auto handler = [&](const auto& observer){ - // check whether this time step is of interest - if(!observer.isInterestedInTime(t)) return; - // walk through space - pfor(from,to, - [&](const Coordinate::dimensions>& i) { - coordinate_converter conv; - if (observer.isInterestedInLocation(i)) { - observer.trigger(t,i,b[conv(i)]); - } - } - ); - }; - - // process all observers - __allscale_unused auto l = { 0,(handler(observers),0)... }; - }; - - auto oddObserver = [&](const Coordinate& from, const Coordinate& to, time_t t){ - - // create a operation handling one observer - __allscale_unused auto handler = [&](const auto& observer){ - // check whether this time step is of interest - if(!observer.isInterestedInTime(t)) return; - // walk through space - pfor(from,to, - [&](const Coordinate::dimensions>& i) { - coordinate_converter conv; - if (observer.isInterestedInLocation(i)) { - observer.trigger(t,i,a[conv(i)]); - } - } - ); - }; - - // process all observers - __allscale_unused auto l = { 0,(handler(observers),0)... }; - }; - - // get the execution plan - auto exec_plan = ExecutionPlan::create(base,steps); - - // process the execution plan - exec_plan.runParallel(even,odd,evenBoundary,oddBoundary,evenObserver,oddObserver,size).wait(); - - // make sure the result is in the a copy - if (steps % 2) { - std::swap(a,b); - } - - // done - return {}; - } - }; - - } // end namespace implementation - - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/algorithm/vcycle.h b/vendor/allscale/api/user/algorithm/vcycle.h deleted file mode 100644 index 95af0c925..000000000 --- a/vendor/allscale/api/user/algorithm/vcycle.h +++ /dev/null @@ -1,272 +0,0 @@ -#pragma once - -#include "allscale/api/user/algorithm/async.h" -#include "allscale/api/core/treeture.h" -#include "allscale/api/user/algorithm/internal/operation_reference.h" - -namespace allscale { -namespace api { -namespace user { -namespace algorithm { - - - // --------------------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------------------- - - - /** - * The VCycle utility enalbes the generic description of a arbitrarily deep V-cycle computation. - * Each stage is realized by a different specialization of the VCycleStage class, conducting the - * necessary reduction, computation, and prolongation steps to assemble the full VCycle. - */ - - class vcycle_reference; - - /** - * A generic v-cycle implementation enabling the creation of a vcycle solver by providing - * an implementation of a v-cycle stage body. - */ - template class StageBody, typename Mesh> - class VCycle; - - - // --------------------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------------------- - - - /** - * An entity to reference the full range of a scan. This token - * can not be copied and will wait for the completion of the scan upon destruction. - */ - class vcycle_reference : public internal::operation_reference { - - public: - - // inherit all constructors - using operation_reference::operation_reference; - - }; - - - namespace detail { - - - template< - typename Mesh, - template class StageBody, - unsigned Level, // the level covered by this instance - unsigned NumLevels // total number of levels - > - class VCycleStage { - - using stage_body = StageBody; - - using nested_stage_type = VCycleStage; - - using stage_body_type = StageBody; - - stage_body_type body; - - nested_stage_type nested; - - public: - - VCycleStage(const Mesh& mesh) - : body(mesh), nested(mesh) {} - - /** - * A function processing a single V-cycle starting at the current level. - */ - void run() { - // one iteration of the V cycle (actually very simple) - up(); // going up (fine to coarse) - down(); // going down (coarse to fine) - } - - void up() { - // forward call to nested - nested.up(); - body.restrictFrom(nested.getBody()); - body.computeFineToCoarse(); - } - - void down() { - body.prolongateTo(nested.getBody()); - nested.getBody().computeCoarseToFine(); - nested.down(); - } - - stage_body_type& getBody() { - return body; - } - - void prolongateFrom(const StageBody& parentBody) { - body.prolongateFrom(parentBody); - } - - template - typename std::enable_if&>::type - getStageBody() const { - return body; - } - - template - typename std::enable_if&>::type - getStageBody() const { - return nested.template getStageBody(); - } - - template - typename std::enable_if&>::type - getStageBody() { - return body; - } - - template - typename std::enable_if&>::type - getStageBody() { - return nested.template getStageBody(); - } - - template - void forEachStage(const Op& op) { - op(Level, this->body); - nested.forEachStage(op); - } - - template - void forEachStage(const Op& op) const { - op(Level, this->body); - nested.forEachStage(op); - } - - }; - - - template< - typename Mesh, - template class StageBody, - unsigned NumLevels // total number of levels - > - class VCycleStage { - - using stage_body_type = StageBody; - - stage_body_type body; - - public: - - VCycleStage(const Mesh& mesh) - : body(mesh) {} - - /** - * A function processing a single V-cycle starting at the current level. - */ - void run() { - // one iteration of the V cycle (actually very simple) - up(); // going up (fine to coarse) - down(); // going down (coarse to fine) - } - - void up() { - // just compute on this level - body.computeFineToCoarse(); - } - - void down() { - // nothing to do - } - - stage_body_type& getBody() { - return body; - } - - void prolongateTo(const StageBody& parentBody) { - body.prolongateTo(parentBody); - } - - template - typename std::enable_if<0==Lvl,const StageBody&>::type - getStageBody() const { - return body; - } - - template - typename std::enable_if<0==Lvl,StageBody&>::type - getStageBody() { - return body; - } - - template - void forEachStage(const Op& op) { - op(0, this->body); - } - - template - void forEachStage(const Op& op) const { - op(0, this->body); - } - - }; - - - } - - - - template< - template class StageBody, - typename Mesh - > - class VCycle { - - using top_stage_type = detail::VCycleStage; - - top_stage_type topStage; - - public: - - using mesh_type = Mesh; - - const mesh_type& mesh; - - VCycle(const mesh_type& mesh) : topStage(mesh), mesh(mesh) {} - - vcycle_reference run(std::size_t numCycles = 1) { - return async([&, numCycles]() { - // run the given number of cycles - for(std::size_t i = 0; i - const StageBody& getStageBody() const { - return topStage.template getStageBody(); - } - - template - StageBody& getStageBody() { - return topStage.template getStageBody(); - } - - template - void forEachStage(const Op& op) { - topStage.forEachStage(op); - } - - template - void forEachStage(const Op& op) const { - topStage.forEachStage(op); - } - - }; - - -} // end namespace algorithm -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/arithmetic.h b/vendor/allscale/api/user/arithmetic.h deleted file mode 100644 index 4f812586e..000000000 --- a/vendor/allscale/api/user/arithmetic.h +++ /dev/null @@ -1,45 +0,0 @@ - -#pragma once - -#include -#include - -#include "allscale/api/core/treeture.h" - - -namespace allscale { -namespace api { -namespace user { - - // --- specific aggregators --- - - - template() + std::declval())> - auto add(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return a + b; }); - } - - template() - std::declval())> - auto sub(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return a - b; }); - } - - template() * std::declval())> - auto mul(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return a * b; }); - } - - - template(),std::declval()))> - auto min(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return std::min(a,b); }); - } - - template(),std::declval()))> - auto max(A&& a, B&& b) { - return core::combine(std::move(a),std::move(b),[](const R& a, const R& b) { return std::max(a,b); }); - } - -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/adaptive_grid.h b/vendor/allscale/api/user/data/adaptive_grid.h deleted file mode 100644 index 802aabe35..000000000 --- a/vendor/allscale/api/user/data/adaptive_grid.h +++ /dev/null @@ -1,577 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/api/core/data.h" - -#include "allscale/api/user/data/grid.h" -#include "allscale/api/user/algorithm/pfor.h" - -#include "allscale/utils/assert.h" -#include "allscale/utils/serializer.h" -#include "allscale/utils/static_grid.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - template - using AdaptiveGridSharedData = GridSharedData; - - template - using AdaptiveGridPoint = GridPoint; - - template - using AdaptiveGridBox = GridBox; - - template - using AdaptiveGridRegion = GridRegion; - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - template - struct layer; - - template - struct layers { - enum { - num_layers = sizeof...(Layers) - }; - }; - - // structures for each Cell configuration and number of layers for nesting - template - struct CellConfig { - - enum { - dims = Dims, - num_layers = Layers::num_layers - }; - }; - - enum Direction { - Up, Down, Left, Right - }; - - namespace detail { - - template - struct size { - typedef size next; - }; - - template - struct make_size { - typedef typename make_size::type::next type; - }; - - template<> - struct make_size<0> { - typedef size<> type; - }; - - template - std::vector getBoundary(const Direction& dir, const utils::StaticGrid& data) { // returns vector of boundary data in each direction - int size[] = { Sizes... }; - int xSize = size[0]; - int ySize = size[1]; - switch(dir) { - case Up: { // returns data from top strip of domain to neighbor - std::vector res(xSize); - for(int i = 0; i < xSize; i++) - res[i] = data[{ i, ySize - 1}]; - return res; - } - case Down: { // returns data from bottom strip of domain to neighbor - std::vector res(xSize); - for(int i = 0; i < xSize; i++) - res[i] = data[{ i, 0 }]; - return res; - } - case Left: { - std::vector res(ySize); - for(int i = 0; i < ySize; i++) { - res[i] = data[{ 0, i }]; - } - return res; - } - case Right: { - std::vector res(ySize); - for(int i = 0; i < ySize; i++) - res[i] = data[{ xSize - 1, i }]; - return res; - } - } - return std::vector(); - } - - template - void setBoundary(const Direction& dir, utils::StaticGrid& data, const std::vector& boundary) { - int size[] = { Sizes... }; - int xSize = size[0]; - int ySize = size[1]; - - switch(dir) { - case Up: { - assert_eq(boundary.size(), (size_t)xSize); - for(int i = 0; i < xSize; i++) - data[{ i, ySize - 1 }] = boundary[i]; - return; - } - case Down: { - assert_eq(boundary.size(), (size_t)xSize); - for(int i = 0; i < xSize; i++) - data[{ i, 0 }] = boundary[i]; - return; - } - case Left: { - assert_eq(boundary.size(), (size_t)ySize); - for(int i = 0; i < ySize; i++) - data[{ 0, i }] = boundary[i]; - return; - } - case Right: { - assert_eq(boundary.size(), (size_t)ySize); - for(int i = 0; i < ySize; i++) - data[{ xSize - 1, i }] = boundary[i]; - return; - } - } - } - - - } // end namespace detail - - - template - struct GridLayerData; - - template - struct GridLayerData, layers, Rest...>> { - static_assert(sizeof...(Sizes) == sizeof...(Dims), "layer dimension has to be equal to the grid dimension."); - - using data_type = utils::StaticGrid; - using nested_type = GridLayerData, layers>; - using addr_type = allscale::utils::Vector; - - enum { layer_number = sizeof...(Rest) + 1 }; - - // the values to be stored on this layer - data_type data; - - // the nested layers - nested_type nested; - - unsigned getLayerNumber() const { return layer_number; } - - template - typename std::enable_if::type getLayer() { - return data; - } - - template - typename std::enable_if::type getLayer() const { - return data; - } - - template - typename std::enable_if < Layer())>::type getLayer() { - return nested.template getLayer(); - } - - template - typename std::enable_if < Layer(nested).template getLayer())>::type getLayer() const { - return nested.template getLayer(); - } - - T& getData(unsigned layer, const addr_type& addr) { - if(layer == getLayerNumber()) { - return data[addr]; - } - return nested.getData(layer, addr); - } - - const T& getData(unsigned layer, const addr_type& addr) const { - if(layer == getLayerNumber()) { - return data[addr]; - } - return nested.getData(layer, addr); - } - - allscale::utils::Vector getLayerSize(unsigned layer) const { - if(layer == getLayerNumber()) { - return data.size(); - } - return nested.getLayerSize(layer); - } - - template - void forAllOnLayer(unsigned layer, const Op& op) const { - if(layer == getLayerNumber()) { - // apply it to this value - data.forEach(op); - } else { - nested.forAllOnLayer(layer, op); - } - } - - template - void forAllOnLayer(unsigned layer, const Op& op) { - if(layer == getLayerNumber()) { - // apply it to this value - data.forEach(op); - } else { - nested.forAllOnLayer(layer, op); - } - } - - template - void refineFromLayer(unsigned layer, const Refiner& refiner) { - if(layer == getLayerNumber()) { - // iterate over cells on nested layer - api::user::algorithm::detail::forEach({0}, nested.data.size(), [&](const auto& index) -> void { - // using the index of a cell on nested layer, computes index covering cell on this layer - auto newIndex = utils::elementwiseDivision(index, utils::elementwiseDivision(nested.data.size(), data.size())); - // simply replicate data to cell on nested layer - nested.data[index] = refiner(data[newIndex]); - }); - } else { - nested.refineFromLayer(layer, refiner); - } - } - - template - void refineFromLayerGrid(unsigned layer, const Refiner& refiner) { - if(layer == getLayerNumber()) { - // using the index of a cell on this layer, computes index of first covered cell on nested layer - auto indexer = [&](const auto& index) { return utils::elementwiseProduct(index, utils::elementwiseDivision(nested.data.size(), data.size())); }; - - // iterate over cells on this layer - api::user::algorithm::detail::forEach({ 0 }, data.size(), [&](const auto& index) -> void { - const auto& res = refiner(data[index]); - auto begin = indexer(index); - auto end = indexer(index + decltype(index){1}); - api::user::algorithm::detail::forEach(begin, end, [&](const auto& i) { - nested.data[i] = res[i-indexer(index)]; - }); - }); - - } else { - nested.refineFromLayerGrid(layer, refiner); - } - } - - template - void coarsenToLayer(unsigned layer, const Coarsener& coarsener) { - if(layer == getLayerNumber()) { - // using the index of a cell on this layer, computes index of first covered cell on nested layer - auto indexer = [&](const auto& index) { return utils::elementwiseProduct(index, utils::elementwiseDivision(nested.data.size(), data.size())); }; - - // iterate over cells on this layer - api::user::algorithm::detail::forEach({ 0 }, data.size(), [&](const auto& index) -> void { - T sum = T(); - // iterate over subset of cells on nested layer, to be projected to the current cell pointed to by index - auto begin = indexer(index); - auto end = indexer(index + decltype(index){1}); - api::user::algorithm::detail::forEach(begin, end, [&](const auto& i) -> void { - sum += coarsener(nested.data[i]); - }); - // compute divisor for average - unsigned result = 1; - (void)std::initializer_list{ (result *= Dims, 0u)... }; - data[index] = sum / result; - }); - - } else { - nested.coarsenToLayer(layer, coarsener); - } - } - - template - void coarsenToLayerGrid(unsigned layer, const Coarsener& coarsener) { - if(layer == getLayerNumber()) { - // using the index of a cell on this layer, computes index of first covered cell on nested layer - auto indexer = [&](const auto& index) { return utils::elementwiseProduct(index, utils::elementwiseDivision(nested.data.size(), data.size())); }; - - // iterate over cells on this layer - utils::StaticGrid param; - api::user::algorithm::detail::forEach({ 0 }, data.size(), [&](const auto& index) -> void { - // iterate over subset of cells on nested layer, to be projected to the current cell pointed to by index - auto begin = indexer(index); - auto end = indexer(index + decltype(index){1}); - api::user::algorithm::detail::forEach(begin, end, [&](const decltype(index)& i) -> void { - param[i - indexer(index)] = nested.data[i]; - }); - data[index] = coarsener(param); - }); - - } else { - nested.coarsenToLayerGrid(layer, coarsener); - } - } - - std::vector getBoundary(unsigned layer, Direction dir) const { // returns vector of boundary data in each direction - if(layer == getLayerNumber()) { - return detail::getBoundary(dir, data); - } - return nested.getBoundary(layer, dir); - } - - void setBoundary(unsigned layer, Direction dir, const std::vector& boundary) { - if(layer == getLayerNumber()) { - detail::setBoundary(dir, data, boundary); - } else { - nested.setBoundary(layer, dir, boundary); - } - } - - void store(utils::ArchiveWriter& writer) const { - writer.write(data); - writer.write(nested); - } - - static GridLayerData load(utils::ArchiveReader& reader) { - auto data = std::move(reader.read()); - auto nested = std::move(reader.read()); - return { data, nested }; - } - - }; - - template - struct GridLayerData, layers<>> { - - using data_type = utils::StaticGrid; - using addr_type = allscale::utils::Vector; - - // the values to be stored on this last layer - data_type data; - - unsigned getLayerNumber() const { return 0; } - - template - typename std::enable_if::type getLayer() { - return data; - } - - template - typename std::enable_if::type getLayer() const { - return data; - } - - T& getData(unsigned layer, const addr_type& addr) { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - return data[addr]; - } - - const T& getData(unsigned layer, const addr_type& addr) const { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - return data[addr]; - } - - allscale::utils::Vector getLayerSize(unsigned layer) const { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - return data.size(); - } - - template - void forAllOnLayer(unsigned layer, const Op& op) const { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - data.forEach(op); - } - - template - void forAllOnLayer(unsigned layer, const Op& op) { - assert_eq(layer, 0) << "Error: trying to access layer " << layer << " --no such layer!"; - data.forEach(op); - } - - template - void refineFromLayer(unsigned layer, const Refiner&) { - assert_fail() << "Error: trying to access layer " << layer << " --no such layer!"; - } - - template - void refineFromLayerGrid(unsigned layer, const Refiner&) { - assert_fail() << "Error: trying to access layer " << layer << " --no such layer!"; - } - - template - void coarsenToLayer(unsigned layer, const Coarsener&) { - assert_fail() << "Error: trying to access layer " << layer << " --no such layer!"; - } - - template - void coarsenToLayerGrid(unsigned layer, const Coarsener&) { - assert_fail() << "Error: trying to access layer " << layer << " --no such layer!"; - } - - std::vector getBoundary(__allscale_unused unsigned layer, Direction dir) const { - assert_eq(0, layer) << "No such layer"; - return detail::getBoundary(dir, data); - } - - void setBoundary(__allscale_unused unsigned layer, Direction dir, const std::vector& boundary) { - assert_eq(0, layer) << "No such layer"; - detail::setBoundary(dir, data, boundary); - } - - void store(utils::ArchiveWriter& writer) const { - writer.write(data); - } - - static GridLayerData, layers<>> load(utils::ArchiveReader& reader) { - GridLayerData, layers<>> grid; - grid.data = std::move(reader.read()); - - return grid; - } - - }; - - - template - struct AdaptiveGridCell; - - - template - struct AdaptiveGridCell> { - using element_type = T; - using unit_size = typename detail::make_size::type; - using addr_type = allscale::utils::Vector; - - AdaptiveGridCell() = default; - AdaptiveGridCell(const AdaptiveGridCell& other) = delete; - AdaptiveGridCell(AdaptiveGridCell&& other) = default; - - // determines the active layer of this grid cell - unsigned active_layer = 0; - - // the data stored in - GridLayerData data; - - AdaptiveGridCell& operator=(const AdaptiveGridCell& other) { - if(this == &other) return *this; - active_layer = other.active_layer; - data = other.data; - return *this; - } - - AdaptiveGridCell& operator=(const T& value) { - // update all active cells - data.forAllOnLayer(active_layer,[&](T& cur){ - cur = value; - }); - return *this; - } - - T& operator[](const addr_type& addr) { - return data.getData(active_layer, addr); - } - - const T& operator[](const addr_type& addr) const { - return data.getData(active_layer, addr); - } - - allscale::utils::Vector getActiveLayerSize() const { - return data.getLayerSize(active_layer); - } - - void setActiveLayer(unsigned level) { - active_layer = level; - } - - unsigned getActiveLayer() const { - return active_layer; - } - - template - auto getLayer() -> decltype(data.template getLayer())& { - return data.template getLayer(); - } - - template - auto getLayer() const -> const decltype(data.template getLayer())& { - return data.template getLayer(); - } - - template - void forAllActiveNodes(const Op& op) const { - data.forAllOnLayer(active_layer, op); - } - - template - void forAllActiveNodes(const Op& op) { - data.forAllOnLayer(active_layer, op); - } - - template - void refine(const Refiner& refiner) { - assert_gt(active_layer, 0) << "Cannot refine any further"; - data.refineFromLayer(active_layer, refiner); - active_layer--; - } - - template - void refineGrid(const Refiner& refiner) { - assert_gt(active_layer, 0) << "Cannot refine any further"; - data.refineFromLayerGrid(active_layer, refiner); - active_layer--; - } - - template - void coarsen(const Coarsener& coarsener) { - assert_gt(Layers::num_layers, active_layer) << "Cannot coarsen any further"; - active_layer++; - data.coarsenToLayer(active_layer, coarsener); - } - - template - void coarsenGrid(const Coarsener& coarsener) { - assert_gt(Layers::num_layers, active_layer) << "Cannot coarsen any further"; - active_layer++; - data.coarsenToLayerGrid(active_layer, coarsener); - } - - std::vector getBoundary(Direction dir) const { - return data.getBoundary(active_layer, dir); - } - - void setBoundary(Direction dir, const std::vector& boundary) { - data.setBoundary(active_layer, dir, boundary); - } - - void store(utils::ArchiveWriter& writer) const { - writer.write(active_layer); - writer.write(data); - } - - static AdaptiveGridCell load(utils::ArchiveReader& reader) { - AdaptiveGridCell cell; - cell.active_layer = std::move(reader.read()); - cell.data = reader.read>(); - return cell; - } - - }; - - template - using AdaptiveGridFragment = GridFragment, CellConfig::dims>; - - template - using AdaptiveGrid = Grid, CellConfig::dims>; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/grid.h b/vendor/allscale/api/user/data/grid.h deleted file mode 100644 index 06c052bc7..000000000 --- a/vendor/allscale/api/user/data/grid.h +++ /dev/null @@ -1,623 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/printer/join.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - - using coordinate_type = std::int64_t; - - template - using GridPoint = utils::Vector; - - template - class GridBox; - - template - class GridRegion; - - template - class GridFragment; - - template - class Grid; - - - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - namespace detail { - - template - struct difference_computer { - - template - void collectDifferences(const GridBox& a, const GridBox& b, GridBox& cur, std::vector>& res) { - std::size_t i = I-1; - - // if b is within a - if (a.min[i] <= b.min[i] && b.max[i] <= a.max[i]) { - - // cover left part - cur.min[i] = a.min[i]; cur.max[i] = b.min[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // cover center part - cur.min[i] = b.min[i]; cur.max[i] = b.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // cover right part - cur.min[i] = b.max[i]; cur.max[i] = a.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // if a is within b - } else if (b.min[i] <= a.min[i] && a.max[i] <= b.max[i]) { - - // cover inner part - cur.min[i] = a.min[i]; cur.max[i] = a.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // if a is on the left - } else if (a.min[i] <= b.min[i]) { - - // cover left part - cur.min[i] = a.min[i]; cur.max[i] = b.min[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // cover right part - cur.min[i] = b.min[i]; cur.max[i] = a.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // otherwise a is on the right - } else { - - // cover left part - cur.min[i] = a.min[i]; cur.max[i] = b.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - // cover right part - cur.min[i] = b.max[i]; cur.max[i] = a.max[i]; - if (cur.min[i] < cur.max[i]) difference_computer().collectDifferences(a,b,cur,res); - - } - - } - - }; - - template<> - struct difference_computer<0> { - - template - void collectDifferences(const GridBox&, const GridBox& b, GridBox& cur, std::vector>& res) { - if(!b.covers(cur) && !cur.empty()) res.push_back(cur); - } - }; - - template - struct box_fuser { - template - bool apply(std::vector>& boxes) { - - // try fuse I-th dimension - for(std::size_t i = 0; i& a = boxes[i]; - GridBox& b = boxes[j]; - if (GridBox::template areFusable(a,b)) { - - // fuse the boxes - GridBox f = GridBox::template fuse(a,b); - boxes.erase(boxes.begin() + j); - boxes[i] = f; - - // start over again - apply(boxes); - return true; - } - } - } - - // fuse smaller dimensions - if (box_fuser().apply(boxes)) { - // start over again - apply(boxes); - return true; - } - - // no more changes - return false; - } - }; - - template<> - struct box_fuser<0> { - template - bool apply(std::vector>&) { return false; } - }; - - template - struct line_scanner { - template - void apply(const GridBox& box, GridPoint& a, GridPoint& b, const Lambda& body) { - for(coordinate_type i = box.min[Dims-I]; i < box.max[Dims-I]; ++i ) { - a[Dims-I] = i; - b[Dims-I] = i; - line_scanner().template apply(box,a,b,body); - } - } - }; - - template<> - struct line_scanner<1> { - template - void apply(const GridBox& box, GridPoint& a, GridPoint& b, const Lambda& body) { - a[Dims-1] = box.min[Dims-1]; - b[Dims-1] = box.max[Dims-1]; - body(a,b); - } - }; - } - - - template - class GridBox { - - static_assert(Dims >= 1, "0-dimension Grids (=Scalars) not yet supported."); - - template - friend struct detail::difference_computer; - - template - friend struct detail::line_scanner; - - template - friend class GridRegion; - - using point_type = GridPoint; - - point_type min; - point_type max; - - public: - GridBox() {} - - GridBox(coordinate_type N) - : min(0), max(N) {} - - GridBox(coordinate_type A, coordinate_type B) - : min(A), max(B) {} - - GridBox(const point_type& N) - : min(0), max(N) {} - - GridBox(const point_type& A, const point_type& B) - : min(A), max(B) {} - - bool empty() const { - return !min.strictlyDominatedBy(max); - } - - std::size_t area() const { - std::size_t res = 1; - for(std::size_t i=0; i merge(const GridBox& a, const GridBox& b) { - - // handle empty sets - if (a.empty() && b.empty()) return std::vector(); - if (a.empty()) return std::vector({b}); - if (b.empty()) return std::vector({a}); - - // boxes are intersecting => we have to do some work - auto res = difference(a,b); - res.push_back(b); - return res; - } - - static GridBox intersect(const GridBox& a, const GridBox& b) { - // compute the intersection - GridBox res = a; - for(std::size_t i = 0; i difference(const GridBox& a, const GridBox& b) { - - // handle case where b covers whole a - if (b.covers(a)) return std::vector(); - - // check whether there is an actual intersection - if (!a.intersectsWith(b)) { - return std::vector({a}); - } - - // slice up every single dimension - GridBox cur; - std::vector res; - detail::difference_computer().collectDifferences(a,b,cur,res); - return res; - } - - static GridBox span(const GridBox& a, const GridBox& b) { - return GridBox( - allscale::utils::elementwiseMin(a.min,b.min), - allscale::utils::elementwiseMax(a.max,b.max) - ); - } - - template - void scanByLines(const Lambda& body) const { - if (empty()) return; - point_type a; - point_type b; - detail::line_scanner().template apply(*this,a,b,body); - } - - template - static bool areFusable(const GridBox& a, const GridBox& b) { - static_assert(D < Dims, "Can not fuse on non-existing dimension."); - if (a.min > b.min) return areFusable(b,a); - if (a.max[D] != b.min[D]) return false; - for(std::size_t i = 0; i - static GridBox fuse(const GridBox& a, const GridBox& b) { - assert_true(areFusable(a,b)); - if (a.min[D] > b.min[D]) return fuse(b,a); - GridBox res = a; - res.max[D] = b.max[D]; - return res; - } - - friend std::ostream& operator<<(std::ostream& out, const GridBox& box) { - return out << "[" << box.min << " - " << box.max << "]"; - } - - /** - * An operator to load an instance of this range from the given archive. - */ - static GridBox load(utils::ArchiveReader& reader) { - auto min = reader.read(); - auto max = reader.read(); - return { min, max }; - } - - /** - * An operator to store an instance of this range into the given archive. - */ - void store(utils::ArchiveWriter& writer) const { - writer.write(min); - writer.write(max); - } - - /** - * Added by psalz for CELERITY on 2018/03/19. - */ - const point_type& get_min() const { return min; } - const point_type& get_max() const { return max; } - - /** - * Added by psalz for CELERITY on 2020/07/13. - */ - point_type& get_min() { return min; } - point_type& get_max() { return max; } - - }; - - template - class GridRegion { - - static_assert(Dims > 0, "0-dimensional grids are not supported yet"); - - using point_type = GridPoint; - using box_type = GridBox; - - std::vector regions; - - public: - - GridRegion() {} - - GridRegion(coordinate_type N) - : regions({box_type(N)}) { - if (0 >= N) regions.clear(); - } - - GridRegion(const point_type& size) - : regions({box_type(0,size)}) { - if (regions[0].empty()) regions.clear(); - } - - GridRegion(const point_type& min, const point_type& max) - : regions({box_type(min,max)}) { - assert_true(min.dominatedBy(max)); - if (regions[0].empty()) regions.clear(); - } - - GridRegion(const box_type& box) - : regions({box}) { - if (regions[0].empty()) regions.clear(); - } - - GridRegion(const GridRegion&) = default; - GridRegion(GridRegion&&) = default; - - GridRegion& operator=(const GridRegion&) = default; - GridRegion& operator=(GridRegion&&) = default; - - static GridRegion single(const point_type& p) { - return GridRegion(p,p+point_type(1)); - } - - box_type boundingBox() const { - // handle empty region - if (regions.empty()) return box_type(0); - - // if there is a single element - if (regions.size() == 1u) return regions.front(); - - // compute the bounding box - box_type res = regions.front(); - for(const box_type& cur : regions) { - res.min = utils::elementwiseMin(res.min, cur.min); - res.max = utils::elementwiseMax(res.max, cur.max); - } - return res; - } - - bool operator==(const GridRegion& other) const { - return difference(*this,other).empty() && other.difference(other,*this).empty(); - } - - bool operator!=(const GridRegion& other) const { - return regions != other.regions; - } - - bool empty() const { - return regions.empty(); - } - - std::size_t area() const { - std::size_t res = 0; - for(const auto& cur : regions) { - res += cur.area(); - } - return res; - } - - static GridRegion merge(const GridRegion& a, const GridRegion& b) { - - // if both sets are empty => done - if(a.empty() && b.empty()) return a; - - // build result - GridRegion res = a; - - // combine regions - for(const auto& cur : difference(b,a).regions) { - res.regions.push_back(cur); - } - - // compress result - res.compress(); - - // done - return res; - } - - template - static GridRegion merge(const GridRegion& a, const GridRegion& b, const Rest& ... rest) { - return merge(merge(a,b),rest...); - } - - static GridRegion intersect(const GridRegion& a, const GridRegion& b) { - - // if one of the sets is empty => done - if(a.empty()) return a; - if(b.empty()) return b; - - // build result - GridRegion res; - - // combine regions - for(const auto& curA : a.regions) { - for(const auto& curB : b.regions) { - box_type diff = box_type::intersect(curA,curB); - if (!diff.empty()) { - res.regions.push_back(diff); - } - } - } - - // compress result - res.compress(); - - // done - return res; - } - - static GridRegion difference(const GridRegion& a, const GridRegion& b) { - - // handle empty sets - if(a.empty() || b.empty()) return a; - - - // build result - GridRegion res = a; - - // combine regions - for(const auto& curB : b.regions) { - std::vector next; - for(const auto& curA : res.regions) { - for(const auto& n : box_type::difference(curA,curB)) { - next.push_back(n); - } - } - res.regions.swap(next); - } - - // compress result - res.compress(); - - // done - return res; - } - - static GridRegion span(const GridRegion& a, const GridRegion& b) { - GridRegion res; - for(const auto& ba : a.regions) { - for(const auto& bb : b.regions) { - res = merge(res,GridRegion(box_type::span(ba,bb))); - } - } - return res; - } - - /** - * Scans the covered range, line by line. - */ - template - void scanByLines(const Lambda& body) const { - for(const auto& cur : regions) { - cur.scanByLines(body); - } - } - - /** - * Scan the covered range, point by point. - */ - template - void scan(const Lambda& body) const { - scanByLines([&](point_type a, const point_type& b) { - for(; a[Dims-1] - void scanByBoxes(const Lambda& f) const { - for(const auto& cur : regions) { - f(cur); - } - } - - /** - * An operator to load an instance of this range from the given archive. - */ - static GridRegion load(utils::ArchiveReader& reader) { - // start with an empty region - GridRegion res; - - // read the box entries - res.regions = std::move(reader.read>()); - - // done - return res; - } - - /** - * An operator to store an instance of this range into the given archive. - */ - void store(utils::ArchiveWriter& writer) const { - // just save the regions - writer.write(regions); - } - - friend std::ostream& operator<<(std::ostream& out, const GridRegion& region) { - return out << "{" << utils::join(",",region.regions) << "}"; - } - - private: - - void compress() { - // try to fuse boxes - detail::box_fuser().apply(regions); - } - - }; - - - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale - diff --git a/vendor/allscale/api/user/data/map.h b/vendor/allscale/api/user/data/map.h deleted file mode 100644 index 11ea47afe..000000000 --- a/vendor/allscale/api/user/data/map.h +++ /dev/null @@ -1,335 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "allscale/api/core/data.h" -#include "allscale/utils/assert.h" - -#include "allscale/utils/printer/set.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - /** - * This header file defines an example data item covering a generic map of key-value pairs. - * The corresponding elements are: - * - a range type which corresponds to a set of keys - * - a fragment type capable of storing a share of the represented data - * - a facade type to be offered to the user as an interface - */ - - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - - - template - class SetRegion; - - template - class Map; - - template - class MapFragment; - - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - /** - * The implementation of a set-region enumerating the covered elements explicitly. - * - * @tparam Element the type of element to describe an element within the set; the type - * has to be serializable - */ - template - class SetRegion { - - /** - * The elements covered by this region, explicitly enumerated. - */ - std::set elements; - - public: - - /** - * Adds a new element to this region. - */ - void add(const Element& e) { - elements.insert(e); - } - - /** - * Add multiple elements at once. - */ - template - void add(const Element& e, const Rest& ... rest) { - add(e); add(rest...); - } - - /** - * Terminal case for adding multiple elements. - */ - void add() { /* nothing */ } - - /** - * Obtains a list of all covered elements. - */ - const std::set& getElements() const { - return elements; - } - - // -- requirements imposed by the region concept -- - - /** - * Determines whether this region is empty. - */ - bool empty() const { - return elements.empty(); - } - - /** - * A comparison operator comparing regions on equality. - */ - bool operator==(const SetRegion& other) const { - return elements == other.elements; - } - - /** - * A comparison operator comparing regions for inequality. - */ - bool operator!=(const SetRegion& other) const { - return !(*this == other); - } - - /** - * An operator to merge two set regions. - */ - static SetRegion merge(const SetRegion& a, const SetRegion& b) { - SetRegion res; - std::set_union(a.elements.begin(),a.elements.end(),b.elements.begin(),b.elements.end(),std::inserter(res.elements, res.elements.begin())); - return res; - } - - /** - * An operator to intersect two set regions. - */ - static SetRegion intersect(const SetRegion& a, const SetRegion& b) { - SetRegion res; - std::set_intersection(a.elements.begin(), a.elements.end(), b.elements.begin(), b.elements.end(), std::inserter(res.elements, res.elements.begin())); - return res; - } - - /** - * An operator to compute the set-difference of two set regions. - */ - static SetRegion difference(const SetRegion& a, const SetRegion& b) { - SetRegion res; - std::set_difference(a.elements.begin(), a.elements.end(), b.elements.begin(), b.elements.end(), std::inserter(res.elements, res.elements.begin())); - return res; - } - - static SetRegion span(const SetRegion&, const SetRegion&) { - std::cout << "Unsupported operation: cannot computed span on set regions!"; - exit(1); - } - - /** - * An operator to load an instance of this range from the given archive. - */ - static SetRegion load(utils::ArchiveReader&) { - assert_not_implemented(); - return SetRegion(); - } - - /** - * An operator to store an instance of this range into the given archive. - */ - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - // nothing so far - } - - /** - * Enables printing the elements of this set region. - */ - friend std::ostream& operator<<(std::ostream& out, const SetRegion& region) { - return out << region.elements; - } - }; - - /** - * An implementation of a fragment of a map-like data item. Each fragment - * stores a sub-section of the key-value pairs to be maintained by the overall map. - * - * @tparam Key the key type of the map to be stored - * @tparam Value the value type of the data to be associated to the key - */ - template - class MapFragment { - - /** - * The region this fragment is covering. - */ - SetRegion region; - - /** - * The data stored in this fragment. - */ - std::map data; - - // enables the facade to access internal data of this class. - friend class Map; - - public: - - using shared_data_type = core::no_shared_data; - using facade_type = Map; - using region_type = SetRegion; - - /** - * Create a new fragment covering the given region. - */ - MapFragment(const region_type& region) - : MapFragment(core::no_shared_data(),region) {} - - /** - * Create a new fragment covering the given region. - */ - MapFragment(const core::no_shared_data&, const region_type& region) : region(region) { - for(const auto& key : region.getElements()) { - data[key]; // initialize content by accessing elements - } - } - - /** - * Obtains a facade to this fragment to be forwarded by the data manager to the user code - * for interacting with this fragment. - */ - Map mask() { - return Map(*this); - } - - /** - * Obtains the range of data covered by this fragment. - */ - const region_type& getCoveredRegion() const { - return region; - } - - /** - * Resizes this fragment to provide enough space to store values for the given key-set. - */ - void resize(const region_type& keys) { - - // update the covered region - region = keys; - - // build up new data storage - std::map newData; - for(const auto& key : keys.getElements()) { - auto pos = data.find(key); - newData[key] = (pos != data.end()) ? pos->second : Value(); - } - - // swap data containers - data.swap(newData); - } - - /** - * Merges all the data from the given fragment into this fragment. - */ - void insert(const MapFragment& other, const region_type& fraction) { - assert_true(core::isSubRegion(fraction,region)) - << "Cannot insert non-sub-set region into this fragment."; - assert_true(core::isSubRegion(fraction,other.region)) - << "Cannot load non-sub-set region from other fragment."; - // move in data - for(const auto& cur : fraction.getElements()) { - auto pos = other.data.find(cur); - assert_true(pos != other.data.end()); - data[cur] = pos->second; - } - } - - void extract(utils::ArchiveWriter&, const region_type&) const { - assert_not_implemented(); - } - - void insert(utils::ArchiveReader&) { - assert_not_implemented(); - } - - }; - - - /** - * The map facade forming the actual data item to be managed by the - * runtime system. - * - * @tparam Key a key type, needs to be serializable - * @tparam Value a value type, needs to be serializable as well - */ - template - class Map : public core::data_item> { - - /** - * A pointer to an underlying fragment owned if used in an unmanaged state. - */ - std::unique_ptr> owned; - - /** - * A reference to the fragment instance operating on, referencing the owned fragment or an externally managed one. - */ - MapFragment& base; - - /** - * Enables fragments to use the private constructor below. - */ - friend class MapFragment; - - /** - * The constructor to be utilized by the fragment to create a facade for an existing fragment. - */ - Map(MapFragment& base) : base(base) {} - - public: - - /** - * Creates a new map covering the given region. - */ - Map(const SetRegion& keys) : owned(std::make_unique>(keys)), base(*owned) {} - - /** - * Provides read/write access to one of the values stored within this map. - */ - Value& operator[](const Key& key) { - auto pos = base.data.find(key); - assert_true(pos != base.data.end()) << "Access to invalid key: " << key << " - covered region: " << base.region; - return pos->second; - } - - /** - * Provides read access to one of the values stored within this map. - */ - const Value& operator[](const Key& key) const { - auto pos = base.data.find(key); - assert_true(pos != base.data.end()) << "Access to invalid key: " << key << " - covered region: " << base.region; - return pos->second; - } - - }; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/mesh.h b/vendor/allscale/api/user/data/mesh.h deleted file mode 100644 index 14586b8f9..000000000 --- a/vendor/allscale/api/user/data/mesh.h +++ /dev/null @@ -1,3444 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/bitmanipulation.h" -#include "allscale/utils/io_utils.h" -#include "allscale/utils/range.h" -#include "allscale/utils/raw_buffer.h" -#include "allscale/utils/serializer.h" -#include "allscale/utils/static_map.h" -#include "allscale/utils/table.h" -#include "allscale/utils/array_utils.h" -#include "allscale/utils/tuple_utils.h" - -#include "allscale/utils/printer/vectors.h" - -#include "allscale/api/core/data.h" -#include "allscale/api/core/prec.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - - // -------------------------------------------------------------------- - // Declarations - // -------------------------------------------------------------------- - - - - // --- mesh type parameter constructs --- - - /** - * The base type of edges connecting nodes of kind A with nodes of kind B - * on the same level. - */ - template - struct edge { - using src_node_kind = A; - using trg_node_kind = B; - }; - - - /** - * The base type of edges connecting nodes of kind A with nodes of kind B - * on adjacent levels. - */ - template - struct hierarchy { - using parent_node_kind = A; - using child_node_kind = B; - }; - - /** - * The constructor for the list of node kinds to be included in a mesh structure. - */ - template - struct nodes { - enum { size = sizeof...(Nodes) }; - }; - - /** - * The constructor for the list of edge kinds to be included in a mesh structure. - */ - template - struct edges { - enum { size = sizeof...(Edges) }; - }; - - /** - * The constructor for the list of hierarchies to be included in a mesh structure. - */ - template - struct hierarchies { - enum { size = sizeof...(Hierarchies) }; - }; - - - // --- mesh type parameter constructs --- - - - /** - * The type used for addressing nodes within meshes. - */ - template - struct NodeRef; - - /** - * The type used for iterating over lists of nodes, e.g. a list of adjacent nodes. - */ - template - using NodeList = utils::range*>; - - - /** - * The type for representing the topological information of a hierarchical mesh. - */ - template< - typename NodeKinds, // < list of node types in each level - typename EdgeKinds, // < list of edge types connecting nodes within levels - typename Hierarchies = hierarchies<>, // < list of edge types connecting nodes between adjacent levels - unsigned Levels = 1, // < number of levels in the hierarchy - unsigned PartitionDepth = 0 // < number of partitioning level - > - class Mesh; - - - /** - * The type for associating (dynamic) information to nodes within a mesh. - */ - template< - typename NodeKind, // < the type of node to be annotated - typename ElementType, // < the type of value to be associated to each node on the given level - unsigned Level, // < the level of the mesh to be annotated - typename PartitionTree // < the type of the partition tree indexing the associated mesh - > - class MeshData; - - - /** - * A utility to construct meshes. - */ - template< - typename NodeKinds, // < list of node types in each level - typename EdgeKinds, // < list of edge types connecting nodes within levels - typename Hierarchies = hierarchies<>, // < list of edge types connecting nodes between adjacent levels - unsigned Levels = 1 // < number of levels in the hierarchy - > - class MeshBuilder; - - - // -- mesh attributes -- - - /** - * The base type for mesh property kinds. - */ - template - struct mesh_property { - using node_kind = NodeKind; - using value_type = ValueType; - }; - - /** - * A container for a collection of mesh properties. A mesh property is - * a value associated to a certain kind of node on each level of a mesh. - * The MeshProperties container allows multiple properties to be managed - * within a single, consistent entity. - * - * To create an instance, the factory function "createProperties" of - * the Mesh structure has to be utilized. - */ - template - class MeshProperties; - - - - // -------------------------------------------------------------------- - // Definitions - // -------------------------------------------------------------------- - - // The type used for indexing nodes in meshes - using node_index_t = uint64_t; - - // The type used for identifying nodes within meshes. - struct NodeID { - - node_index_t id; - - NodeID() = default; - - constexpr explicit NodeID(node_index_t id) : id(id) {} - - operator node_index_t() const { - return id; - } - - node_index_t getOrdinal() const { - return id; - } - - bool operator==(const NodeID& other) const { - return id == other.id; - } - - bool operator!=(const NodeID& other) const { - return id != other.id; - } - - bool operator<(const NodeID& other) const { - return id < other.id; - } - - friend std::ostream& operator<<(std::ostream& out, const NodeID& ref) { - return out << "n" << ref.id; - } - - }; - - /** - * The type used for addressing nodes within meshes. - */ - template - struct NodeRef : public NodeID { - - using node_kind = Kind; - - enum { level = Level }; - - NodeRef() = default; - - constexpr explicit NodeRef(node_index_t id) - : NodeID(id) {} - - constexpr explicit NodeRef(NodeID id) - : NodeID(id) {} - - }; - - - template - class NodeRange { - - NodeRef _begin; - - NodeRef _end; - - public: - - NodeRange(const NodeRef& a, const NodeRef& b) : _begin(a), _end(b) { - assert_le(_begin.id,_end.id); - } - - NodeRange() : _begin(), _end() {} - - NodeRef getBegin() const { - return _begin; - } - - NodeRef getEnd() const { - return _end; - } - - NodeRef operator[](std::size_t index) const { - return NodeRef(NodeID(_begin.id + (node_index_t)index)); - } - - std::size_t size() const { - return _end.id - _begin.id; - } - - - class const_iterator : public std::iterator> { - - node_index_t cur; - - public: - - const_iterator(NodeID pos) : cur(pos) {}; - - bool operator==(const const_iterator& other) const { - return cur == other.cur; - } - - bool operator!=(const const_iterator& other) const { - return !(*this == other); - } - - bool operator<(const const_iterator& other) const { - return cur < other.cur; - } - - bool operator<=(const const_iterator& other) const { - return cur <= other.cur; - } - - bool operator>=(const const_iterator& other) const { - return cur >= other.cur; - } - - bool operator>(const const_iterator& other) const { - return cur > other.cur; - } - - NodeRef operator*() const { - return NodeRef{cur}; - } - - const_iterator& operator++() { - ++cur; - return *this; - } - - const_iterator operator++(int) { - const_iterator res = *this; - ++cur; - return res; - } - - const_iterator& operator--() { - --cur; - return *this; - } - - const_iterator operator--(int) { - const_iterator res = *this; - --cur; - return res; - } - - const_iterator& operator+=(std::ptrdiff_t n) { - cur += n; - return *this; - } - - const_iterator& operator-=(std::ptrdiff_t n) { - cur -= n; - return *this; - } - - friend const_iterator operator+(const_iterator& iter, std::ptrdiff_t n) { - const_iterator res = iter; - res.cur += n; - return res; - - } - - friend const_iterator& operator+(std::ptrdiff_t n, const_iterator& iter) { - const_iterator res = iter; - res.cur += n; - return res; - } - - const_iterator operator-(std::ptrdiff_t n) { - const_iterator res = *this; - res.cur -= n; - return res; - } - - std::ptrdiff_t operator-(const_iterator& other) const { - return std::ptrdiff_t(cur - other.cur); - } - - NodeRef operator[](std::ptrdiff_t n) const { - return *(*this + n); - } - - }; - - const_iterator begin() const { - return const_iterator(_begin); - } - - const_iterator end() const { - return const_iterator(_end); - } - - template - void forAll(const Body& body) { - for(const auto& cur : *this) { - body(cur); - } - } - - friend std::ostream& operator<<(std::ostream& out, const NodeRange& range) { - return out << "[" << range._begin.id << "," << range._end.id << ")"; - } - - }; - - - namespace detail { - - template - struct is_nodes : public std::false_type {}; - - template - struct is_nodes> : public std::true_type {}; - - template - struct is_edges : public std::false_type {}; - - template - struct is_edges> : public std::true_type {}; - - template - struct is_hierarchies : public std::false_type {}; - - template - struct is_hierarchies> : public std::true_type {}; - - template - struct level { - enum { value = Level }; - }; - - - template - struct get_level; - - template - struct get_level> { - enum { value = L }; - }; - - template - struct get_level : public get_level {}; - template - struct get_level : public get_level {}; - template - struct get_level : public get_level {}; - - template - using plain_type = typename std::remove_cv::type>::type; - - - template - void sumPrefixes(utils::Table& list) { - Element counter = 0; - for(auto& cur : list) { - auto tmp = cur; - cur = counter; - counter += tmp; - } - } - - - template - class NodeSet { - - using LevelData = utils::StaticMap,std::size_t>; - - using DataStore = std::array; - - static_assert(std::is_trivial::value, "The implementation assumes that this type is trivial!"); - - DataStore data; - - public: - - NodeSet() { - for(auto& cur : data) cur = LevelData(0); - } - - NodeSet(const NodeSet&) = default; - NodeSet(NodeSet&& other) = default; - - NodeSet& operator=(const NodeSet&) =default; - NodeSet& operator=(NodeSet&&) =default; - - - // -- observers and mutators -- - - template - NodeRef create() { - auto& node_counter = getNodeCounter(); - return NodeRef(node_counter++); - } - - template - NodeRange create(std::size_t num) { - auto& node_counter = getNodeCounter(); - NodeRef begin((node_index_t)node_counter); - node_counter += num; - NodeRef end((node_index_t)node_counter); - return { begin, end }; - } - - template - std::size_t getNumNodes() const { - return getNodeCounter(); - } - - // -- IO support -- - - void store(std::ostream& out) const { - // store the number of nodes - utils::write(out, data); - } - - static NodeSet load(std::istream& in) { - - // produce result - NodeSet res; - - // restore the number of nodes - res.data = utils::read(in); - - // done - return res; - } - - static NodeSet interpret(utils::RawBuffer& buffer) { - - // produce result - NodeSet res; - - // restore the number of nodes - res.data = buffer.consume(); - - // done - return res; - - } - - private: - - template - std::size_t& getNodeCounter() { - return data[Level].template get(); - } - - template - const std::size_t& getNodeCounter() const { - return data[Level].template get(); - } - }; - - - template - class EdgeSet { - - // -- the data stored per relation -- - class Relation { - - static_assert( - sizeof(NodeRef) == sizeof(NodeID), - "For this implementation to be correct node references have to be simple node IDs." - ); - - utils::Table forward_offsets; - utils::Table forward_targets; - - utils::Table backward_offsets; - utils::Table backward_targets; - - std::vector> edges; - - public: - - template - NodeList getSinks(const NodeRef& src) const { - using List = NodeList; - using TrgNodeRef = NodeRef; - assert_true(isClosed()) << "Accessing non-closed edge set!"; - if (src.id+1 >= forward_offsets.size() || forward_targets.empty()) return List{nullptr,nullptr}; - return List{ - reinterpret_cast(&forward_targets[forward_offsets[src.id]]), - reinterpret_cast(&forward_targets[forward_offsets[src.id+1]]) - }; - } - - template - NodeList getSources(const NodeRef& src) const { - using List = NodeList; - using SrcNodeRef = NodeRef; - assert_true(isClosed()) << "Accessing non-closed edge set!"; - if (src.id+1 >= backward_offsets.size() || backward_targets.empty()) return List{nullptr,nullptr}; - return List{ - reinterpret_cast(&backward_targets[backward_offsets[src.id]]), - reinterpret_cast(&backward_targets[backward_offsets[src.id+1]]) - }; - } - - void addEdge(NodeID from, NodeID to) { - edges.push_back({from,to}); - } - - bool isClosed() const { - return edges.empty(); - } - - void close() { - - // get maximum source and target - std::size_t maxSourceID = 0; - std::size_t maxTargetID = 0; - for(const auto& cur : edges) { - maxSourceID = std::max(maxSourceID,cur.first); - maxTargetID = std::max(maxTargetID,cur.second); - } - - // init forward / backward vectors - forward_offsets = utils::Table(maxSourceID + 2, 0); - forward_targets = utils::Table(edges.size()); - - backward_offsets = utils::Table(maxTargetID + 2,0); - backward_targets = utils::Table(edges.size()); - - // count number of sources / sinks - for(const auto& cur : edges) { - ++forward_offsets[cur.first]; - ++backward_offsets[cur.second]; - } - - // compute prefix sums - sumPrefixes(forward_offsets); - sumPrefixes(backward_offsets); - - // fill in targets - auto forward_pos = forward_offsets; - auto backward_pos = backward_offsets; - for(const auto& cur : edges) { - forward_targets[forward_pos[cur.first]++] = cur.second; - backward_targets[backward_pos[cur.second]++] = cur.first; - } - - // clear edges - edges.clear(); - - } - - void store(std::ostream& out) const { - // only allow closed sets to be stored - assert_true(isClosed()); - - // write forward edge data - forward_offsets.store(out); - forward_targets.store(out); - - // write backward edge data - backward_offsets.store(out); - backward_targets.store(out); - - } - - static Relation load(std::istream& in) { - - Relation res; - - // restore edge data - res.forward_offsets = utils::Table::load(in); - res.forward_targets = utils::Table::load(in); - - res.backward_offsets = utils::Table::load(in); - res.backward_targets = utils::Table::load(in); - - // done - return res; - } - - static Relation interpret(utils::RawBuffer& buffer) { - - Relation res; - - // restore edge data - res.forward_offsets = utils::Table::interpret(buffer); - res.forward_targets = utils::Table::interpret(buffer); - - res.backward_offsets = utils::Table::interpret(buffer); - res.backward_targets = utils::Table::interpret(buffer); - - // done - return res; - } - - }; - - using LevelData = utils::StaticMap,Relation>; - - using EdgeData = std::array; - - EdgeData data; - - public: - - EdgeSet() = default; - EdgeSet(const EdgeSet&) = default; - EdgeSet(EdgeSet&& other) = default; - - EdgeSet& operator=(const EdgeSet&) = delete; - EdgeSet& operator=(EdgeSet&&) = default; - - - template - void addEdge(const NodeRef& src, const NodeRef& trg) { - getEdgeRelation().addEdge(src,trg); - } - - void close() { - // for all levels - for(auto& level : data) { - // for all edge kinds - for(auto& rel : level) { - rel.close(); - } - } - } - - bool isClosed() const { - // for all levels - for(const auto& level : data) { - // for all edge kinds - for(const auto& rel : level) { - // check this instance - if (!rel.isClosed()) return false; - } - } - // all are done - return true; - } - - template - NodeList getSinks(const NodeRef& src) const { - return getEdgeRelation().template getSinks(src); - } - - template - NodeList getSources(const NodeRef& src) const { - return getEdgeRelation().template getSources(src); - } - - // -- IO support -- - - void store(std::ostream& out) const { - // only allow closed sets to be stored - assert_true(isClosed()); - - // store each relation independently - for(const auto& level : data) { - for(const auto& rel : level) { - rel.store(out); - } - } - - } - - static EdgeSet load(std::istream& in) { - - EdgeSet res; - - // load each relation independently - for(auto& level : res.data) { - for(auto& rel : level) { - rel = Relation::load(in); - } - } - - // done - return res; - } - - static EdgeSet interpret(utils::RawBuffer& buffer) { - - EdgeSet res; - - // interpret each relation independently - for(auto& level : res.data) { - for(auto& rel : level) { - rel = Relation::interpret(buffer); - } - } - - // done - return res; - } - - private: - - template - Relation& getEdgeRelation() { - return data[Level].template get(); - } - - template - const Relation& getEdgeRelation() const { - return data[Level].template get(); - } - - }; - - - template - class HierarchySet { - - class Relation { - - // -- inefficient build structures -- - - std::vector> children; - - std::vector parents; - - // -- efficient simulation structures -- - - utils::Table parent_targets; - - utils::Table children_offsets; - utils::Table children_targets; - - public: - - void addChild(const NodeID& parent, const NodeID& child) { - // a constant for an unknown parent - static const NodeID unknownParent(std::numeric_limits::max()); - - assert_ne(parent,unknownParent) << "Unknown parent constant must not be used!"; - - // register child as a child of parent - if (parent >= children.size()) { - children.resize(parent + 1); - } - auto& list = children[parent]; - for(auto& cur : list) if (cur == child) return; - list.push_back(child); - - - // register parent of child - if (child >= parents.size()) { - parents.resize(child + 1,unknownParent); - } - auto& trg = parents[child]; - assert_true(trg == unknownParent || trg == parent) - << "Double-assignment of parent for child " << child << " and parent " << parent; - - // update parent - trg = parent; - } - - bool isClosed() const { - return children.empty(); - } - - void close() { - // a constant for an unknown parent - static const NodeID unknownParent(std::numeric_limits::max()); - - // get maximum index of parents - std::size_t maxParent = 0; - for(const auto& cur : parents) { - maxParent = std::max(maxParent,cur); - } - - // compute total number of parent-child links - std::size_t numParentChildLinks = 0; - for(const auto& cur : children) { - numParentChildLinks += cur.size(); - } - - // init forward / backward vectors - children_offsets = utils::Table(maxParent + 2, 0); - children_targets = utils::Table(numParentChildLinks); - - // init child offsets - std::size_t idx = 0; - std::size_t offset = 0; - for(const auto& cur : children) { - children_offsets[idx] = offset; - offset += cur.size(); - idx++; - if (idx > maxParent) break; - } - children_offsets[idx] = offset; - - // fill in targets - idx = 0; - for(const auto& cur : children) { - for(const auto& child : cur) { - children_targets[idx++] = child; - } - } - - // clear edges - children.clear(); - - // init parent target table - parent_targets = utils::Table(parents.size()); - for(std::size_t i=0; i - NodeList getChildren(const NodeRef& parent) const { - using List = NodeList; - using ChildNodeRef = NodeRef; - assert_true(isClosed()); - if (parent.id >= children_offsets.size()-1 || children_targets.empty()) return List{nullptr,nullptr}; - return List{ - reinterpret_cast(&children_targets[children_offsets[parent.id]]), - reinterpret_cast(&children_targets[children_offsets[parent.id+1]]) - }; - } - - template - NodeRef getParent(const NodeRef& child) const { - using ParentNodeRef = NodeRef; - assert_true(isClosed()); - assert_lt(child.id,parent_targets.size()); - return ParentNodeRef(parent_targets[child.id]); - } - - // -- IO support -- - - void store(std::ostream& out) const { - // only allow closed sets to be stored - assert_true(isClosed()); - - // write parents table - parent_targets.store(out); - - // write child lists - children_offsets.store(out); - children_targets.store(out); - } - - static Relation load(std::istream& in) { - - Relation res; - - // restore parents - res.parent_targets = utils::Table::load(in); - - res.children_offsets = utils::Table::load(in); - res.children_targets = utils::Table::load(in); - - // done - return res; - } - - static Relation interpret(utils::RawBuffer& buffer) { - - Relation res; - - // restore parents - res.parent_targets = utils::Table::interpret(buffer); - - res.children_offsets = utils::Table::interpret(buffer); - res.children_targets = utils::Table::interpret(buffer); - - // done - return res; - } - - }; - - using LevelData = utils::StaticMap,Relation>; - - using HierarchyData = std::array; - - HierarchyData data; - - public: - - template - void addChild(const NodeRef& parent, const NodeRef& child) { - getRelation().addChild(parent,child); - } - - void close() { - for(auto& level : data) { - for(auto& rel : level) { - rel.close(); - } - } - } - - bool isClosed() const { - for(const auto& level : data) { - for(const auto& rel : level) { - if (!rel.isClosed()) return false; - } - } - return true; - } - - template - NodeList getChildren(const NodeRef& parent) const { - return getRelation().template getChildren(parent); - } - - template - NodeRef getParent(const NodeRef& child) const { - return getRelation().template getParent(child); - } - - - // -- IO support -- - - void store(std::ostream& out) const { - // only allow closed sets to be stored - assert_true(isClosed()); - - // store each relation independently - for(const auto& level : data) { - for(const auto& rel : level) { - rel.store(out); - } - } - - } - - static HierarchySet load(std::istream& in) { - - HierarchySet res; - - // load each relation independently - for(auto& level : res.data) { - for(auto& rel : level) { - rel = Relation::load(in); - } - } - - // done - return res; - } - - static HierarchySet interpret(utils::RawBuffer& buffer) { - - HierarchySet res; - - // interpret each relation independently - for(auto& level : res.data) { - for(auto& rel : level) { - rel = Relation::interpret(buffer); - } - } - - // done - return res; - } - - private: - - template - Relation& getRelation() { - return data[Level].template get(); - } - - template - const Relation& getRelation() const { - return data[Level].template get(); - } - - }; - - - // -- utilities for enumerating level/kind combinations -- - - template - struct KindEnumerator; - - template - struct KindEnumerator { - template - void operator()(const Body& body) const { - body(First()); - KindEnumerator()(body); - } - }; - - template<> - struct KindEnumerator<> { - template - void operator()(const Body&) const {} - }; - - - template - struct LevelEnumerator { - template - void operator()(const Body& body) const { - body(level()); - LevelEnumerator()(body); - } - }; - - template<> - struct LevelEnumerator<0> { - template - void operator()(const Body& body) const { - body(level<0>()); - } - }; - - template - struct HierarchyLevelEnumerator { - template - void operator()(const Body& body) const { - body(level()); - HierarchyLevelEnumerator()(body); - } - }; - - template<> - struct HierarchyLevelEnumerator<1> { - template - void operator()(const Body& body) const { - body(level<1>()); - } - }; - - template<> - struct HierarchyLevelEnumerator<0> { - template - void operator()(const Body&) const {} - }; - - - // -- mesh topology store -- - - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels - > - struct MeshTopologyData; - - template< - typename ... Nodes, - typename ... Edges, - typename ... Hierarchies, - unsigned Levels - > - struct MeshTopologyData,edges,hierarchies,Levels> { - - using NodeSetType = NodeSet; - using EdgeSetType = EdgeSet; - using HierarchySetType = HierarchySet; - - // the topological data of all the nodes, edges and hierarchy relations on all levels - NodeSetType nodeSets; - EdgeSetType edgeSets; - HierarchySetType hierarchySets; - - MeshTopologyData() = default; - MeshTopologyData(const MeshTopologyData&) = default; - MeshTopologyData(MeshTopologyData&& other) = default; - - MeshTopologyData& operator= (MeshTopologyData&& m) = default; - - template - void forAllNodeKinds(const Body& body) const { - LevelEnumerator forAllLevels; - KindEnumerator forAllKinds; - forAllLevels([&](const auto& level){ - forAllKinds([&](const auto& kind){ - body(kind,level); - }); - }); - } - - template - void forAllEdgeKinds(const Body& body) const { - LevelEnumerator forAllLevels; - KindEnumerator forAllKinds; - forAllLevels([&](const auto& level){ - forAllKinds([&](const auto& kind){ - body(kind,level); - }); - }); - } - - template - void forAllHierarchyKinds(const Body& body) const { - HierarchyLevelEnumerator forAllLevels; - KindEnumerator forAllKinds; - forAllLevels([&](const auto& level){ - forAllKinds([&](const auto& kind){ - body(kind,level); - }); - }); - } - - template - std::size_t getNumNodes() const { - return nodeSets.template getNumNodes(); - } - - void close() { - edgeSets.close(); - hierarchySets.close(); - } - - bool isClosed() const { - return edgeSets.isClosed() && hierarchySets.isClosed(); - } - - // -- IO support -- - - void store(std::ostream& out) const { - nodeSets.store(out); - edgeSets.store(out); - hierarchySets.store(out); - } - - static MeshTopologyData load(std::istream& in) { - MeshTopologyData res; - res.nodeSets = NodeSetType::load(in); - res.edgeSets = EdgeSetType::load(in); - res.hierarchySets = HierarchySetType::load(in); - return std::move(res); - } - - static MeshTopologyData interpret(utils::RawBuffer& buffer) { - MeshTopologyData res; - res.nodeSets = NodeSetType::interpret(buffer); - res.edgeSets = EdgeSetType::interpret(buffer); - res.hierarchySets = HierarchySetType::interpret(buffer); - return std::move(res); - } - - }; - - /** - * A common basis class for sub-tree and sub-graph references, which are both based on paths - * within a tree. - */ - template - class PathRefBase { - - protected: - - using value_t = uint32_t; - - value_t path; - value_t mask; - - PathRefBase(value_t path, value_t mask) - : path(path), mask(mask) {} - - public: - - static Derived root() { - return { 0u , 0u }; - } - - value_t getPath() const { - return path; - } - - value_t getMask() const { - return mask; - } - - value_t getDepth() const { - if (PathRefBase::mask == 0) return 0; - return sizeof(PathRefBase::mask) * 8 - utils::countLeadingZeros(PathRefBase::mask); - } - - bool isRoot() const { - return PathRefBase::mask == 0; - } - - bool isLeftChild() const { - assert_false(isRoot()); - return !isRightChild(); - } - - bool isRightChild() const { - assert_false(isRoot()); - return PathRefBase::path & (1 << (getDepth()-1)); - } - - Derived getLeftChild() const { - assert_lt(getDepth(),sizeof(PathRefBase::path)*8); - Derived res = asDerived(); - res.PathRefBase::mask = res.PathRefBase::mask | (1 << getDepth()); - return res; - } - - Derived getRightChild() const { - Derived res = getLeftChild(); - res.PathRefBase::path = res.PathRefBase::path | (1 << getDepth()); - return res; - } - - bool operator==(const Derived& other) const { - // same mask and same valid bit part - return (PathRefBase::mask == other.PathRefBase::mask) && - ((PathRefBase::path & PathRefBase::mask) == (other.PathRefBase::path & other.PathRefBase::mask)); - } - - bool operator!=(const Derived& other) const { - return !(*this == other); - } - - bool operator<(const Derived& other) const { - - auto thisMask = PathRefBase::mask; - auto thatMask = other.PathRefBase::mask; - - auto thisPath = PathRefBase::path; - auto thatPath = other.PathRefBase::path; - - while(true) { - - // if they are the same, we are done - if (thisMask == thatMask && thisPath == thatPath) return false; - - // check last mask bit - auto thisMbit = thisMask & 0x1; - auto thatMbit = thatMask & 0x1; - - if (thisMbit < thatMbit) return true; - if (thisMbit > thatMbit) return false; - - auto thisPbit = thisMbit & thisPath; - auto thatPbit = thatMbit & thatPath; - - if (thisPbit < thatPbit) return true; - if (thisPbit > thatPbit) return false; - - thisMask >>= 1; - thatMask >>= 1; - thisPath >>= 1; - thatPath >>= 1; - } - } - - bool operator<=(const Derived& other) const { - return *this == other || *this < other; - } - - bool operator>=(const Derived& other) const { - return !(asDerived() < other); - } - - bool operator>(const Derived& other) const { - return !(*this <= other); - } - - bool covers(const Derived& other) const { - if (getDepth() > other.getDepth()) return false; - if (PathRefBase::mask != (PathRefBase::mask & other.PathRefBase::mask)) return false; - return (PathRefBase::mask & PathRefBase::path) == (PathRefBase::mask & other.PathRefBase::path); - } - - bool tryMerge(const Derived& other) { - - if (covers(other)) return true; - - if (other.covers(asDerived())) { - *this = other; - return true; - } - - // the masks need to be identical - auto thisMask = PathRefBase::mask; - auto thatMask = other.PathRefBase::mask; - if (thisMask != thatMask) return false; - - - // the valid portion of the paths must only differe in one bit - auto thisPath = PathRefBase::path; - auto thatPath = other.PathRefBase::path; - - auto thisValid = thisPath & thisMask; - auto thatValid = thatPath & thatMask; - - auto diff = thisValid ^ thatValid; - - // if there is more than 1 bit difference, there is nothing we can do - if (utils::countOnes(diff) != 1) return false; - - // ignore this one bit in the mask - PathRefBase::mask = PathRefBase::mask & (~diff); - - // done - return true; - } - - /** - * @return true if the intersection is not empty; - * in this case this instance has been updated to represent the intersection - * false if the intersection is empty, the object has not been altered - */ - bool tryIntersect(const Derived& other) { - - // if the other covers this, the intersection is empty - if (other.covers(asDerived())) return true; - - // if this one is the larger one, this one gets reduced to the smaller one - if (covers(other)) { - *this = other; - return true; - } - - // make sure common constraints are identical - auto filterMask = PathRefBase::mask & other.PathRefBase::mask; - auto thisFilter = PathRefBase::path & filterMask; - auto thatFilter = other.PathRefBase::path & filterMask; - if (thisFilter != thatFilter) return false; - - // unite (disjunction!) the constraints of both sides - PathRefBase::path = (PathRefBase::path & PathRefBase::mask) | (other.PathRefBase::path & other.PathRefBase::mask); - PathRefBase::mask = PathRefBase::mask | other.PathRefBase::mask; - return true; - } - - - - template - void visitComplement(const Body& body, unsigned depth = 0) const { - - // when we reached the depth of this reference, we are done - if (getDepth() == depth) return; - - auto bitMask = (1 << depth); - - // if at this depth there is no wild card - if (PathRefBase::mask & bitMask) { - - // invert bit at this position - Derived cpy = asDerived(); - cpy.PathRefBase::path ^= bitMask; - cpy.PathRefBase::mask = cpy.PathRefBase::mask & ((bitMask << 1) - 1); - - // this is an element of the complement - body(cpy); - - // continue path - visitComplement(body,depth+1); - - return; - } - - // follow both paths, do nothing here - Derived cpy = asDerived(); - cpy.PathRefBase::mask = PathRefBase::mask | bitMask; - - // follow the 0 path - cpy.PathRefBase::path = PathRefBase::path & ~bitMask; - cpy.template visitComplement(body,depth+1); - - // follow the 1 path - cpy.PathRefBase::path = PathRefBase::path | bitMask; - cpy.template visitComplement(body,depth+1); - - } - - std::vector getComplement() const { - std::vector res; - visitComplement([&](const Derived& cur){ - res.push_back(cur); - }); - return res; - } - - private: - - Derived& asDerived() { - return static_cast(*this); - } - - const Derived& asDerived() const { - return static_cast(*this); - } - - }; - - - /** - * A utility to address nodes in the partition tree. - */ - class SubTreeRef : public PathRefBase { - - using super = PathRefBase; - - friend super; - - friend class SubMeshRef; - - SubTreeRef(value_t path, value_t mask) - : super(path,mask) {} - - public: - - value_t getIndex() const { - // this is reversing the path 000ZYX to 1XYZ to get the usual - // order of nodes within a embedded tree - auto res = 1; - value_t cur = path; - for(unsigned i = 0; i>= 1; - } - return res; - } - - - SubTreeRef getParent() const { - assert_false(isRoot()); - SubTreeRef res = *this; - res.PathRefBase::mask = res.PathRefBase::mask & ~(1 << (getDepth()-1)); - return res; - } - - - template - void enumerate(const Body& body) { - - if (preOrder) body(*this); - - if (getDepth() < DepthLimit) { - getLeftChild().enumerate(body); - getRightChild().enumerate(body); - } - - if (!preOrder) body(*this); - - } - - - friend std::ostream& operator<<(std::ostream& out, const SubTreeRef& ref) { - out << "r"; - auto depth = ref.getDepth(); - for(value_t i = 0; i> i) % 2); - } - return out; - } - - }; - - - /** - * A reference to a continuously stored part of a mesh. - */ - class SubMeshRef : public PathRefBase { - - using super = PathRefBase; - - using value_t = uint32_t; - - friend super; - - SubMeshRef(value_t path, value_t mask) - : super(path,mask) {} - - public: - - SubMeshRef(const SubTreeRef& ref) - : super(ref.path, ref.mask) {} - - SubMeshRef getMasked(unsigned pos) const { - assert_lt(pos,getDepth()); - SubMeshRef res = *this; - res.super::mask = res.super::mask & ~(1< - void scan(const Body& body) const { - - // look for last 0 in mask - unsigned zeroPos = utils::countTrailingZeros(~super::mask); - if (zeroPos >= getDepth()) { - body(SubTreeRef(super::path,super::mask)); - return; - } - - // recursive - SubMeshRef copy = getUnmasked(zeroPos); - - // set bit to 0 - copy.super::path = copy.super::path & ~( 1 << zeroPos ); - copy.scan(body); - - // set bit to 1 - copy.super::path = copy.super::path | ( 1 << zeroPos ); - copy.scan(body); - } - - - template - void scan(const PartitionTree& ptree, const Body& body) const { - scan([&](const SubTreeRef& ref){ - ptree.template getNodeRange(ref).forAll(body); - }); - } - - - friend std::ostream& operator<<(std::ostream& out, const SubMeshRef& ref) { - out << "r"; - auto depth = ref.getDepth(); - for(value_t i = 0; i> i) % 2); - } else { - out << ".*"; - } - } - return out; - } - - }; - - /** - * A union of sub mesh references. - */ - class MeshRegion { - - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels, - unsigned PartitionDepth - > - friend class PartitionTree; - - std::vector refs; - - MeshRegion(const SubMeshRef* begin, const SubMeshRef* end) - : refs(begin,end) {} - - public: - - MeshRegion() {} - - MeshRegion(const SubMeshRef& ref) { - refs.push_back(ref); - } - - MeshRegion(std::initializer_list meshRefs) : refs(meshRefs) { - restoreSet(); - compress(); - } - - MeshRegion(const std::vector& refs) : refs(refs) { - restoreSet(); - compress(); - } - - bool operator==(const MeshRegion& other) const { - return this == &other || refs == other.refs || (difference(*this,other).empty() && difference(other,*this).empty()); - } - - bool operator!=(const MeshRegion& other) const { - return !(*this == other); - } - - const std::vector& getSubMeshReferences() const { - return refs; - } - - bool empty() const { - return refs.empty(); - } - - bool covers(const SubMeshRef& ref) const { - // cheap: one is covering the given reference - // expensive: the union of this and the reference is the same as this - return std::any_of(refs.begin(),refs.end(),[&](const SubMeshRef& a) { - return a.covers(ref); - }) || (merge(*this,MeshRegion(ref)) == *this); - } - - bool operator<(const MeshRegion& other) const { - return refs < other.refs; - } - - static MeshRegion merge(const MeshRegion& a, const MeshRegion& b) { - MeshRegion res; - std::set_union( - a.refs.begin(), a.refs.end(), - b.refs.begin(), b.refs.end(), - std::back_inserter(res.refs) - ); - res.compress(); - return res; - } - - template - static MeshRegion merge(const MeshRegion& a, const MeshRegion& b, const Rest& ... rest) { - return merge(merge(a,b),rest...); - } - - static MeshRegion intersect(const MeshRegion& a, const MeshRegion& b) { - - MeshRegion res; - - // compute pairwise intersections - for(const auto& ra : a.refs) { - for(const auto& rb : b.refs) { - auto tmp = ra; - if (tmp.tryIntersect(rb)) { - res.refs.push_back(tmp); - } - } - } - - // restore set invariant - res.restoreSet(); - - // compress the set representation - res.compress(); - return res; - } - - static MeshRegion difference(const MeshRegion& a, const MeshRegion& b) { - return intersect(a,complement(b)); - } - - static MeshRegion span(const MeshRegion&, const MeshRegion&) { - std::cout << "Scan operation not yet implemented!"; - exit(1); - } - - MeshRegion complement() const { - - MeshRegion res = SubMeshRef::root(); - - // aggregate the complements of all entries - for(const auto& cur : refs) { - - // compute the complement of the current entry - MeshRegion tmp; - cur.visitComplement([&](const SubMeshRef& ref) { - tmp.refs.push_back(ref); - }); - - // restore invariant - tmp.restoreSet(); - tmp.compress(); - - // intersect current complement with running complement - res = intersect(res,tmp); - } - - // done - return res; - } - - static MeshRegion complement(const MeshRegion& region) { - return region.complement(); - } - - /** - * An operator to load an instance of this region from the given archive. - */ - static MeshRegion load(utils::ArchiveReader&) { - assert_not_implemented(); - return MeshRegion(); - } - - /** - * An operator to store an instance of this region into the given archive. - */ - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - // nothing so far - } - - template - void scan(const Body& body) const { - for(const auto& cur : refs) { - cur.scan(body); - } - } - - template - void scan(const PartitionTree& ptree, const Body& body) const { - for(const auto& cur : refs) { - cur.scan(ptree,body); - } - } - - - friend std::ostream& operator<<(std::ostream& out, const MeshRegion& reg) { - return out << reg.refs; - } - - private: - - void compress() { - - // check precondition - assert_true(std::is_sorted(refs.begin(),refs.end())); - - // Phase 1: remove redundant entries - removeCovered(); - - // Phase 2: collapse adjacent entries (iteratively) - while (collapseSiblings()) {} - } - - - bool removeCovered() { - - // see whether any change happend - bool changed = false; - for(std::size_t i = 0; i, - unsigned Levels = 1, - unsigned depth = 12 - > - class PartitionTree; - - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels, - unsigned depth - > - class PartitionTree { - - static_assert(detail::is_nodes::value, - "First template argument of PartitionTree must be of type nodes<...>"); - - static_assert(detail::is_edges::value, - "Second template argument of PartitionTree must be of type edges<...>"); - - static_assert(detail::is_hierarchies::value, - "Third template argument of PartitionTree must be of type hierarchies<...>"); - - }; - - template< - typename ... Nodes, - typename ... Edges, - typename ... Hierarchies, - unsigned Levels, - unsigned PartitionDepth - > - class PartitionTree,edges,hierarchies,Levels,PartitionDepth> { - - public: - - enum { depth = PartitionDepth }; - - private: - - // an internal construct to store node ranges - struct RangeStore { - NodeID begin; - NodeID end; - }; - - // an internal construct to store regions in open and - // closed structure - // - open: the region pointer is referencing the stored region - // - closed: the begin and end indices reference and interval of an externally maintained - // list of regions - struct RegionStore { - - // -- open -- - MeshRegion* region; // the ownership is managed by the enclosing tree - - // -- closed -- - std::size_t offset; - std::size_t length; - - RegionStore() - : region(nullptr), offset(0), length(0) {} - - MeshRegion toRegion(const SubMeshRef* references) const { - if (region) return *region; - auto start = references + offset; - auto end = start + length; - return MeshRegion(start,end); - } - - RegionStore& operator=(const MeshRegion& value) { - if (!region) region = new MeshRegion(); - *region = value; - return *this; - } - }; - - - static_assert(Levels > 0, "There must be at least one level!"); - - struct LevelInfo { - - utils::StaticMap,RangeStore> nodeRanges; - - utils::StaticMap,RegionStore> forwardClosure; - utils::StaticMap,RegionStore> backwardClosure; - - utils::StaticMap,RegionStore> parentClosure; - utils::StaticMap,RegionStore> childClosure; - - }; - - struct Node { - - std::array data; - - }; - - // some preconditions required for the implementation of this class to work - static_assert(std::is_trivially_copyable::value, "RangeStore should be trivially copyable!"); - static_assert(std::is_trivially_copyable::value, "RegionStore should be trivially copyable!"); - static_assert(std::is_trivially_copyable::value, "LevelInfo should be trivially copyable!" ); - static_assert(std::is_trivially_copyable::value, "Nodes should be trivially copyable!" ); - static_assert(std::is_trivially_copyable::value, "SubMeshRefs should be trivially copyable!"); - - enum { num_elements = 1ul << (depth + 1) }; - - bool owned; - - Node* data; - - std::size_t numReferences; - - SubMeshRef* references; - - PartitionTree(Node* data, std::size_t numReferences, SubMeshRef* references) - : owned(false), data(data), numReferences(numReferences), references(references) { - assert_true(data); - assert_true(references); - } - - public: - - PartitionTree() : owned(true), data(new Node[num_elements]), numReferences(0), references(nullptr) {} - - ~PartitionTree() { - if (owned) { - delete [] data; - free(references); - } - } - - PartitionTree(const PartitionTree&) = delete; - - PartitionTree(PartitionTree&& other) - : owned(other.owned), - data(other.data), - numReferences(other.numReferences), - references(other.references) { - - // free other from ownership - other.owned = false; - other.data = nullptr; - other.references = nullptr; - } - - PartitionTree& operator=(const PartitionTree&) = delete; - - PartitionTree& operator=(PartitionTree&& other) { - assert_ne(this,&other); - - // swap content and ownership - std::swap(owned,other.owned); - numReferences = other.numReferences; - std::swap(data,other.data); - std::swap(references,other.references); - - // done - return *this; - } - - bool isClosed() const { - return references != nullptr; - } - - void close() { - // must not be closed for now - assert_false(isClosed()); - - // a utility to apply an operation on each mesh region - auto forEachMeshRegion = [&](const auto& op) { - for(std::size_t i=0; igetSubMeshReferences().size(); - }); - - // create reference buffer - references = static_cast(malloc(sizeof(SubMeshRef) * numReferences)); - if (!references) { - throw "Unable to allocate memory for managing references!"; - } - - // transfer ownership of SubMeshRefs to reference buffer - std::size_t offset = 0; - forEachMeshRegion([&](RegionStore& cur){ - - // check whether there is a region - if (!cur.region) { - cur.offset = 0; - cur.length = 0; - return; - } - - // close the region - const auto& refs = cur.region->getSubMeshReferences(); - cur.offset = offset; - cur.length = refs.size(); - for(auto& cur : refs) { - // placement new for this reference - new (&references[offset++]) SubMeshRef(cur); - } - - // delete old region - delete cur.region; - cur.region = nullptr; - }); - - // make sure counting and transferring covered the same number of references - assert_eq(numReferences, offset); - } - - template - NodeRange getNodeRange(const SubTreeRef& ref = SubTreeRef::root()) const { - assert_lt(ref.getIndex(),num_elements); - auto range = data[ref.getIndex()].data[Level].nodeRanges.template get(); - return { - NodeRef{ range.begin }, - NodeRef{ range.end } - }; - } - - template - void setNodeRange(const SubTreeRef& ref, const NodeRange& range) { - auto& locRange = getNode(ref).data[Level].nodeRanges.template get(); - locRange.begin = range.getBegin(); - locRange.end = range.getEnd(); - } - - template - MeshRegion getForwardClosure(const SubTreeRef& ref) const { - return getNode(ref).data[Level].forwardClosure.template get().toRegion(references); - } - - template - void setForwardClosure(const SubTreeRef& ref, const MeshRegion& region) { - getNode(ref).data[Level].forwardClosure.template get() = region; - } - - template - MeshRegion getBackwardClosure(const SubTreeRef& ref) const { - return getNode(ref).data[Level].backwardClosure.template get().toRegion(references); - } - - template - void setBackwardClosure(const SubTreeRef& ref, const MeshRegion& region) { - getNode(ref).data[Level].backwardClosure.template get() = region; - } - - template - MeshRegion getParentClosure(const SubTreeRef& ref) const { - return getNode(ref).data[Level].parentClosure.template get().toRegion(references); - } - - template - void setParentClosure(const SubTreeRef& ref, const MeshRegion& region) { - getNode(ref).data[Level].parentClosure.template get() = region; - } - - - template - MeshRegion getChildClosure(const SubTreeRef& ref) const { - return getNode(ref).data[Level].childClosure.template get().toRegion(references); - } - - template - void setChildClosure(const SubTreeRef& ref, const MeshRegion& region) { - getNode(ref).data[Level].childClosure.template get() = region; - } - - - template - void visitPreOrder(const Body& body) { - SubTreeRef::root().enumerate(body); - } - - template - void visitPostOrder(const Body& body) { - SubTreeRef::root().enumerate(body); - } - - // -- serialization support for network transferes -- - - void store(utils::ArchiveWriter&) const { - assert_not_implemented(); - } - - static PartitionTree load(utils::ArchiveReader&) { - assert_not_implemented(); - return PartitionTree(); - } - - // -- load / store for files -- - - void store(std::ostream& out) const { - - // start by writing out number of references - out.write(reinterpret_cast(&numReferences),sizeof(numReferences)); - - // continue with node information - out.write(reinterpret_cast(data),sizeof(Node)*num_elements); - - // and end with references - out.write(reinterpret_cast(references),sizeof(SubMeshRef)*numReferences); - - } - - static PartitionTree load(std::istream& in) { - - // create the resulting tree (owning all its data) - PartitionTree res; - - // read in number of references - in.read(reinterpret_cast(&res.numReferences),sizeof(res.numReferences)); - - // load nodes - in.read(reinterpret_cast(res.data),sizeof(Node)*num_elements); - - // load references - res.references = reinterpret_cast(malloc(sizeof(SubMeshRef)*res.numReferences)); - in.read(reinterpret_cast(res.references),sizeof(SubMeshRef)*res.numReferences); - - // done - return res; - } - - static PartitionTree interpret(utils::RawBuffer& raw) { - - // get size - std::size_t numReferences = raw.consume(); - - // get nodes - Node* nodes = raw.consumeArray(num_elements); - - // get references - SubMeshRef* references = raw.consumeArray(numReferences); - - // wrap up results - return PartitionTree(nodes,numReferences,references); - } - - - private: - - const Node& getNode(const SubTreeRef& ref) const { - assert_lt(ref.getIndex(),num_elements); - return data[ref.getIndex()]; - } - - Node& getNode(const SubTreeRef& ref) { - assert_lt(ref.getIndex(),num_elements); - return data[ref.getIndex()]; - } - - }; - - - class NaiveMeshPartitioner { - - public: - - template< - unsigned PartitionDepth, - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels - > - PartitionTree partition(const MeshTopologyData& data) const { - - // create empty partition tree - PartitionTree res; - - // set up node ranges for partitions - data.forAllNodeKinds([&](const auto& nodeKind, const auto& level) { - - // get node kind and level - using NodeKind = plain_type; - // not directly accessing lvl::value here, as MSVC 15 refuses to acknowledge its constexpr-ness - using lvl = get_level; - - // set root node to cover the full range - auto num_nodes = data.template getNumNodes(); - res.template setNodeRange( - SubTreeRef::root(), - NodeRange( - NodeRef{ 0 }, - NodeRef{ NodeID((node_index_t)num_nodes) } - ) - ); - - // recursively sub-divide ranges - res.visitPreOrder([&](const SubTreeRef& ref) { - - if (ref.isRoot()) return; - - // get the range of the parent - auto range = res.template getNodeRange(ref.getParent()); - - // extract begin / end - auto begin = range.getBegin(); - auto end = range.getEnd(); - - // compute mid - auto mid = NodeRef(begin.id + (end.id - begin.id) / 2); - - // get range for this node - if (ref.isLeftChild()) { - range = NodeRange(begin,mid); - } else { - range = NodeRange(mid,end); - } - - // update the range - res.template setNodeRange(ref,range); - - }); - - }); - - // set up closures for edges - data.forAllEdgeKinds([&](const auto& edgeKind, const auto& level) { - - // get edge kind and level - using EdgeKind = plain_type; - // not directly accessing lvl::value here, as MSVC 15 refuses to acknowledge its constexpr-ness - using lvl = get_level; - - // the closure is everything for now - MeshRegion closure = SubMeshRef::root(); - - // initialize all the closured with the full region - res.visitPreOrder([&](const SubTreeRef& ref) { - // fix forward closure - res.template setForwardClosure(ref,closure); - - // fix backward closure - res.template setBackwardClosure(ref,closure); - }); - - }); - - - // set up closures for hierarchies - data.forAllHierarchyKinds([&](const auto& hierarchyKind, const auto& level) { - - // get hierarchy kind and level - using HierarchyKind = plain_type; - // not directly accessing lvl::value here, as MSVC 15 refuses to acknowledge its constexpr-ness - using lvl = get_level; - - // make sure this is not called for level 0 - assert_gt(lvl::value,0) << "There should not be any hierarchies on level 0."; - - // the closure is everything for now - MeshRegion closure = SubMeshRef::root(); - - // initialize all the closured with the full region - res.visitPreOrder([&](const SubTreeRef& ref) { - - // fix parent closure - res.template setParentClosure(ref,closure); - - // fix child closure - res.template setChildClosure(ref,closure); - }); - - }); - - // close the data representation - res.close(); - - // done - return res; - } - - }; - - - template< - typename NodeKind, - typename ElementType, - unsigned Level, - typename PartitionTree - > - class MeshDataFragment { - public: - - using facade_type = MeshData; - using region_type = MeshRegion; - using shared_data_type = PartitionTree; - - private: - - using partition_tree_type = PartitionTree; - - const partition_tree_type& partitionTree; - - region_type coveredRegion; - - std::vector data; - - friend facade_type; - - public: - - MeshDataFragment() = delete; - - MeshDataFragment(const partition_tree_type& ptree, const region_type& region) - : partitionTree(ptree), coveredRegion(region) { - - // get upper boundary of covered node ranges - std::size_t max = 0; - region.scan([&](const SubTreeRef& cur){ - max = std::max(max,ptree.template getNodeRange(cur).getEnd().id); - }); - - // resize data storage - data.resize(max); - - } - - private: - - MeshDataFragment(const partition_tree_type& ptree, std::vector&& data) - : partitionTree(ptree), coveredRegion(SubMeshRef::root()), data(std::move(data)) {} - - public: - - MeshDataFragment(const MeshDataFragment&) = delete; - MeshDataFragment(MeshDataFragment&&) = default; - - MeshDataFragment& operator=(const MeshDataFragment&) = delete; - MeshDataFragment& operator=(MeshDataFragment&&) = default; - - - facade_type mask() { - return facade_type(*this); - } - - const region_type& getCoveredRegion() const { - return coveredRegion; - } - - const ElementType& operator[](const NodeRef& id) const { - return data[id.getOrdinal()]; - } - - ElementType& operator[](const NodeRef& id) { - return data[id.getOrdinal()]; - } - - std::size_t size() const { - return data.size(); - } - - void resize(const region_type&) { - - } - - void insert(const MeshDataFragment& other, const region_type& area) { - assert_true(core::isSubRegion(area,other.coveredRegion)) << "New data " << area << " not covered by source of size " << coveredRegion << "\n"; - assert_true(core::isSubRegion(area,coveredRegion)) << "New data " << area << " not covered by target of size " << coveredRegion << "\n"; - - assert_not_implemented(); - std::cout << core::isSubRegion(area,other.coveredRegion); - -// // copy data line by line using memcpy -// area.scanByLines([&](const point& a, const point& b){ -// auto start = flatten(a); -// auto length = (flatten(b) - start) * sizeof(T); -// std::memcpy(&data[start],&other.data[start],length); -// }); - } - - void extract(utils::ArchiveWriter&, const region_type&) const { - assert_not_implemented(); - } - - void insert(utils::ArchiveReader&) { - assert_not_implemented(); - } - - - // -- load / store for files -- - - void store(std::ostream& out) const { - - // check that the element type is a trivial type - assert_true(std::is_trivial::value) - << "Sorry, only trivial types may be stored through this infrastructure."; - - // this fragment is required to cover the entire mesh - assert_eq(coveredRegion, SubMeshRef::root()); - - // write covered data to output stream - utils::write(out,data.size()); - utils::write(out,data.begin(),data.end()); - } - - static MeshDataFragment load(const partition_tree_type& ptree, std::istream& in) { - // restore the data buffer - std::size_t size = utils::read(in); - std::vector data(size); - utils::read(in,data.begin(),data.end()); - - // create the data fragment - return MeshDataFragment(ptree,std::move(data)); - } - - static MeshDataFragment interpret(const partition_tree_type& ptree, utils::RawBuffer& raw) { - - // TODO: when exchanging the vector by some manageable structure, replace this - // For now: we copy the data - - // copy the data buffer - std::size_t size = raw.consume(); - auto start = raw.consumeArray(size); - std::vector data(start, start + size); - - // create the data fragment - return MeshDataFragment(ptree,std::move(data)); - } - - }; - - - /** - * An entity to reference the full range of a scan. This token - * can not be copied and will wait for the completion of the scan upon destruction. - */ - class scan_reference { - - core::treeture handle; - - public: - - scan_reference(core::treeture&& handle) - : handle(std::move(handle)) {} - - scan_reference() {}; - scan_reference(const scan_reference&) = delete; - scan_reference(scan_reference&&) = default; - - scan_reference& operator=(const scan_reference&) = delete; - scan_reference& operator=(scan_reference&&) = default; - - ~scan_reference() { handle.wait(); } - - void wait() const { handle.wait(); } - - }; - - } // end namespace detail - - template< - typename NodeKind, - typename ElementType, - unsigned Level, - typename PartitionTree - > - class MeshData : public core::data_item> { - - template - friend class Mesh; - - public: - - using node_kind = NodeKind; - - using element_type = ElementType; - - using fragment_type = detail::MeshDataFragment; - - private: - - std::unique_ptr owned; - - fragment_type* data; - - - friend fragment_type; - - MeshData(fragment_type& data) : data(&data) {} - - MeshData(std::unique_ptr&& data) : owned(std::move(data)), data(owned.get()) {} - - MeshData(const PartitionTree& ptree, const detail::MeshRegion& region) - : owned(std::make_unique(ptree,region)), data(owned.get()) {} - - public: - - const ElementType& operator[](const NodeRef& id) const { - return (*data)[id]; - } - - ElementType& operator[](const NodeRef& id) { - return (*data)[id]; - } - - std::size_t size() const { - return (*data).size(); - } - - - void store(std::ostream& out) const { - // ensure that the data is owned - assert_true(owned) << "Only supported when data is owned (not managed by some Data Item Manager)"; - owned->store(out); - } - - static MeshData load(const PartitionTree& ptree, std::istream& in) { - return std::make_unique(fragment_type::load(ptree,in)); - } - - static MeshData interpret(const PartitionTree& ptree, utils::RawBuffer& raw) { - return std::make_unique(fragment_type::interpret(ptree,raw)); - } - }; - - - /** - * The default implementation of a mesh is capturing all ill-formed parameterizations - * of the mesh type to provide cleaner compiler errors. - */ - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned Levels, - unsigned PartitionDepth - > - class Mesh { - - static_assert(detail::is_nodes::value, - "First template argument of Mesh must be of type nodes<...>"); - - static_assert(detail::is_edges::value, - "Second template argument of Mesh must be of type edges<...>"); - - static_assert(detail::is_hierarchies::value, - "Third template argument of Mesh must be of type hierarchies<...>"); - - }; - - - /** - * The type for representing the topological information of a hierarchical mesh. - */ - template< - typename ... NodeKinds, - typename ... EdgeKinds, - typename ... Hierarchies, - unsigned Levels, - unsigned PartitionDepth - > - class Mesh,edges,hierarchies,Levels,PartitionDepth> { - - static_assert(Levels > 0, "There must be at least one level!"); - - public: - - using topology_type = detail::MeshTopologyData,edges,hierarchies,Levels>; - - using partition_tree_type = detail::PartitionTree,edges,hierarchies,Levels,PartitionDepth>; - - template - using mesh_data_type = MeshData; - - using builder_type = MeshBuilder,edges,hierarchies,Levels>; - - friend builder_type; - - enum { levels = Levels }; - - private: - - partition_tree_type partitionTree; - - topology_type data; - - Mesh(topology_type&& data, partition_tree_type&& partitionTree) - : partitionTree(std::move(partitionTree)), data(std::move(data)) { - assert_true(data.isClosed()); - } - - public: - - // -- ctors / dtors / assignments -- - - Mesh(const Mesh&) = delete; - Mesh(Mesh&&) = default; - - Mesh& operator=(const Mesh&) = delete; - Mesh& operator=(Mesh&&) = default; - - - // -- provide access to components -- - - const topology_type& getTopologyData() const { - return data; - } - - const partition_tree_type& getPartitionTree() const { - return partitionTree; - } - - // -- mesh querying -- - - template - std::size_t getNumNodes() const { - return data.template getNumNodes(); - } - - // -- mesh interactions -- - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::trg_node_kind - > - NodeList getSinks(const NodeRef& a) const { - return data.edgeSets.template getSinks(a); - } - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::trg_node_kind - > - NodeRef getSink(const NodeRef& a) const { - const auto& list = getSinks(a); - assert_eq(list.size(),1); - return list.front(); - } - - template< - typename EdgeKind, - typename B, - unsigned Level, - typename A = typename EdgeKind::src_node_kind - > - NodeList getSources(const NodeRef& b) const { - return data.edgeSets.template getSources(b); - } - - template< - typename EdgeKind, - typename B, - unsigned Level, - typename A = typename EdgeKind::src_node_kind - > - NodeRef getSource(const NodeRef& b) const { - const auto& list = getSources(b); - assert_eq(list.size(),1); - return list.front(); - } - - // -- overloading of getNeighbor convenience functions (aliases of getSink / getSource ) -- - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::trg_node_kind - > - std::enable_if_t::value,NodeRef> - getNeighbor(const NodeRef& a) const { - return getSink(a); - } - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::trg_node_kind - > - std::enable_if_t::value,NodeList> - getNeighbors(const NodeRef& a) const { - return getSinks(a); - } - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::src_node_kind - > - std::enable_if_t::value,NodeRef> - getNeighbor(const NodeRef& a) const { - return getSource(a); - } - - template< - typename EdgeKind, - typename A, - unsigned Level, - typename B = typename EdgeKind::src_node_kind - > - std::enable_if_t::value,NodeList> - getNeighbors(const NodeRef& a) const { - return getSources(a); - } - - // -- parent / children relation -- - - template< - typename Hierarchy, - typename A, unsigned Level, - typename B = typename Hierarchy::parent_node_kind - > - NodeRef getParent(const NodeRef& a) const { - return data.hierarchySets.template getParent(a); - } - - template< - typename Hierarchy, - typename A, unsigned Level, - typename B = typename Hierarchy::child_node_kind - > - NodeList getChildren(const NodeRef& a) const { - return data.hierarchySets.template getChildren(a); - } - - /** - * A sequential operation calling the given body for each node of the given kind - * on the given level in parallel. - * - * NOTE: this operation is processed sequentially, and can thus not be distributed - * among multiple nodes. Use pforAll instead - * - * @tparam Kind the kind of node to be visited - * @tparam Level the level of the mesh to be addressed - * @tparam Body the type of operation to be applied on each node - * - * @param body the operation to be applied on each node of the selected kind and level - * @return a scan reference for synchronizing upon the asynchronously processed operation - */ - template - void forAll(const Body& body) const { - // iterate over all selected elements - for(const auto& cur : partitionTree.template getNodeRange(detail::SubTreeRef::root())) { - body(cur); - } - } - - /** - * A parallel operation calling the given body for each node of the given kind - * on the given level in parallel. - * - * This is the main operator for iterating over nodes within a mesh. All visits - * will always be conducted in parallel. - * - * @tparam Kind the kind of node to be visited - * @tparam Level the level of the mesh to be addressed - * @tparam Body the type of operation to be applied on each node - * - * @param body the operation to be applied on each node of the selected kind and level - * @return a scan reference for synchronizing upon the asynchronously processed operation - */ - template - detail::scan_reference pforAll(const Body& body) const { - - using range = detail::SubTreeRef; - - return core::prec( - // -- base case test -- - [](const range& a){ - // when we reached a leaf, we are at the bottom - return a.getDepth() == PartitionDepth; - }, - // -- base case -- - [&](const range& a){ - // apply the body to the elements of the current range - for(const auto& cur : partitionTree.template getNodeRange(a)) { - body(cur); - } - }, - // -- step case -- - core::pick( - // -- split -- - [](const range& a, const auto& rec){ - return core::parallel( - rec(a.getLeftChild()), - rec(a.getRightChild()) - ); - }, - // -- serialized step case (optimization) -- - [&](const range& a, const auto&){ - // apply the body to the elements of the current range - for(const auto& cur : partitionTree.template getNodeRange(a)) { - body(cur); - } - } - ) - )(detail::SubTreeRef::root()); - } - - template - typename utils::lambda_traits::result_type preduce( - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init, - const ReduceLocalState& exit) const { - typedef typename utils::lambda_traits::result_type res_type; - - using range = detail::SubTreeRef; - - auto handle = [](const InitLocalState& init, const MapOp& map, const ReduceLocalState& exit, const range& a, - const partition_tree_type& partitionTree)->res_type { - auto res = init(); - auto mapB = [map,&res](const auto& cur) { - return map(cur,res); - }; - for(const auto& cur : partitionTree.template getNodeRange(a)) { - mapB(cur); - } - return exit(res); - }; - - - // implements a binary splitting policy for iterating over the given iterator range - return core::prec( - [](const range& a) { - return a.getDepth() == PartitionDepth; - }, - [&](const range& a)->res_type { - return handle(init, map, exit, a, partitionTree); - }, - core::pick( - [reduce](const range& a, const auto& nested) { - // here we have the splitting - auto left = a.getLeftChild(); - auto right = a.getRightChild(); - -// return user::add(nested(left), nested(right)); - return core::combine(std::move(nested(left)),std::move(nested(right)),reduce); - }, - [&](const range& a, const auto&)->res_type { - return handle(init, map, exit, a, partitionTree); - } - ) - )(detail::SubTreeRef::root()).get(); - } - - template - typename utils::lambda_traits::result_type preduce( - const MapOp& map, - const ReduceOp& reduce, - const InitLocalState& init) const { - return preduce(map, reduce, init, [](typename utils::lambda_traits::result_type a) { return a; }); - } - - template - typename utils::lambda_traits::result_type preduce( - const MapOp& map, - const ReduceOp& reduce) const { - typedef typename utils::lambda_traits::result_type res_type; - - return preduce(map, reduce, [](){ return res_type(); }, [](res_type a) { return a; }); - } - - // -- mesh data -- - - template - MeshData createNodeData() const { - return MeshData(partitionTree,detail::SubMeshRef::root()); - } - - template - std::array, N> createNodeDataArray() const { - return utils::build_array([&] { return MeshData(partitionTree,detail::SubMeshRef::root()); } ); - } - - template - MeshData loadNodeData(std::istream& in) const { - return MeshData::load(partitionTree,in); - } - - template - MeshData interpretNodeData(utils::RawBuffer& raw) const { - return MeshData::interpret(partitionTree,raw); - } - - - // -- mesh property handling -- - - template - MeshProperties createProperties() const { - return MeshProperties(*this); - } - - template - MeshProperties loadProperties(std::istream& in) const { - return MeshProperties::load(*this,in); - } - - template - MeshProperties interpretProperties(utils::RawBuffer& raw) const { - return MeshProperties::interpret(*this,raw); - } - - // -- load / store for files -- - - void store(std::ostream& out) const { - - // write partition tree - partitionTree.store(out); - - // write topological data - data.store(out); - - } - - static Mesh load(std::istream& in) { - - // interpret the partition tree - auto partitionTree = partition_tree_type::load(in); - - // load topological data - auto topologyData = topology_type::load(in); - - // create result - return Mesh( - std::move(topologyData), - std::move(partitionTree) - ); - - } - - static Mesh interpret(utils::RawBuffer& raw) { - - // interpret the partition tree - auto partitionTree = partition_tree_type::interpret(raw); - - // load topological data - auto topologyData = topology_type::interpret(raw); - - // create result - return Mesh( - std::move(topologyData), - std::move(partitionTree) - ); - - } - - }; - - - - /** - * The default implementation of a mesh build is capturing all ill-formed parameterizations - * of the mesh builder type to provide cleaner compiler errors. - */ - template< - typename Nodes, - typename Edges, - typename Hierarchies, - unsigned layers - > - class MeshBuilder { - - static_assert(detail::is_nodes::value, - "First template argument of MeshBuilder must be of type nodes<...>"); - - static_assert(detail::is_edges::value, - "Second template argument of MeshBuilder must be of type edges<...>"); - - static_assert(detail::is_hierarchies::value, - "Third template argument of MeshBuilder must be of type hierarchies<...>"); - - }; - - /** - * A utility to construct meshes. - */ - template< - typename ... NodeKinds, - typename ... EdgeKinds, - typename ... Hierarchies, - unsigned Levels - > - class MeshBuilder,edges,hierarchies,Levels> { - - static_assert(Levels > 0, "There must be at least one level!"); - - public: - - template - using mesh_type = Mesh,edges,hierarchies,Levels,PartitionDepth>; - - using topology_type = detail::MeshTopologyData,edges,hierarchies,Levels>; - - private: - - topology_type data; - - public: - - // -- mesh modeling -- - - template - NodeRef create() { - // TODO: check that Kind is a valid node kind - static_assert(Level < Levels, "Trying to create a node on invalid level."); - return data.nodeSets.template create(); - } - - template - NodeRange create(unsigned num) { - // TODO: check that Kind is a valid node kind - static_assert(Level < Levels, "Trying to create a node on invalid level."); - return data.nodeSets.template create(num); - } - - template - void link(const NodeRef& a, const NodeRef& b) { - // TODO: check that EdgeKind is a valid edge kind - static_assert(Level < Levels, "Trying to create an edge on invalid level."); - static_assert(std::is_same::value, "Invalid source node type"); - static_assert(std::is_same::value, "Invalid target node type"); - return data.edgeSets.template addEdge(a,b); - } - - template - void link(const NodeRef& parent, const NodeRef& child) { - // TODO: check that HierarchyKind is a valid hierarchy kind - static_assert(LevelA == LevelB+1, "Can not connect nodes of non-adjacent levels in hierarchies"); - static_assert(LevelA < Levels, "Trying to create a hierarchical edge to an invalid level."); - static_assert(std::is_same::value, "Invalid source node type"); - static_assert(std::is_same::value, "Invalid target node type"); - return data.hierarchySets.template addChild(parent,child); - } - - // -- build mesh -- - - template - mesh_type build(const Partitioner& partitioner) const & { - - // close the topological data - topology_type meshData = data; - meshData.close(); - - // partition the mesh - auto partitionTree = partitioner.template partition(meshData); - - return mesh_type(std::move(meshData), std::move(partitionTree)); - } - - template - mesh_type build() const & { - return build(detail::NaiveMeshPartitioner()); - } - - - template - mesh_type build(const Partitioner& partitioner) && { - - // partition the mesh - auto partitionTree = partitioner.template partition(data); - - return mesh_type(std::move(data), std::move(partitionTree)); - } - - template - mesh_type build() const && { - return std::move(*this).template build(detail::NaiveMeshPartitioner()); - } - - }; - - - // -- Mesh Property Collections -------------------------------------- - - - // TODO: reduce the template instantiations complexity of this code. - - namespace detail { - - template - class MeshPropertiesData { - - using property_list = utils::type_list; - - template - using mesh_data_type = MeshData; - - using data_t = std::tuple...>; - - data_t data; - - MeshPropertiesData(data_t&& data) : data(std::move(data)) {} - - public: - - template - MeshPropertiesData(const Mesh& mesh) - : data(mesh.template createNodeData()...) {} - - template - mesh_data_type& get() { - return std::get::value>(data); - } - - template - const mesh_data_type& get() const { - return std::get::value>(data); - } - - void store(std::ostream& out) const { - // write property data - utils::forEach(data,[&](const auto& entry){ - entry.store(out); - }); - } - - template - static MeshPropertiesData load(const Mesh& mesh, std::istream& in) { - // a temporary tuple type to be filled with temporary results - using tmp_data_type = std::tuple>...>; - - // load property data - tmp_data_type data; - utils::forEach(data,[&](auto& entry){ - // load data - using data_type = typename std::remove_reference_t::element_type; - using node_kind = typename data_type::node_kind; - using value_type = typename data_type::element_type; - entry = std::make_unique(mesh.template loadNodeData(in)); - }); - - // move data to tuple - return MeshPropertiesData(utils::map(data,[&](auto& entry){ - return std::move(*entry.get()); - })); - } - - template - static MeshPropertiesData interpret(const Mesh& mesh, utils::RawBuffer& raw) { - // a temporary tuple type to be filled with temporary results - using tmp_data_type = std::tuple>...>; - - // load property data - tmp_data_type data; - utils::forEach(data,[&](auto& entry){ - // load data - using data_type = typename std::remove_reference_t::element_type; - using node_kind = typename data_type::node_kind; - using value_type = typename data_type::element_type; - entry = std::make_unique(mesh.template interpretNodeData(raw)); - }); - - // move data to tuple - return MeshPropertiesData(utils::map(data,[&](auto& entry){ - return std::move(*entry.get()); - })); - } - - }; - - template - class MeshPropertiesLevels { - - template - using level_data = MeshPropertiesData; - - using nested_level_type = MeshPropertiesLevels; - - level_data data; - - nested_level_type nested; - - MeshPropertiesLevels(level_data&& data, nested_level_type&& nested) - : data(std::move(data)), nested(std::move(nested)) {} - - public: - - template - MeshPropertiesLevels(const Mesh& mesh) - : data(mesh), nested(mesh) {} - - template - std::enable_if_t>& - get() { - return data; - } - - template - const std::enable_if_t>& - get() const { - return data; - } - - template - std::enable_if_t>& - get() { - return nested.template get(); - } - - template - const std::enable_if_t>& - get() const { - return nested.template get(); - } - - void store(std::ostream& out) const { - // write property data - data.store(out); - // write nested data - nested.store(out); - } - - - template - static MeshPropertiesLevels load(const Mesh& mesh, std::istream& in) { - // load property data - auto data = level_data::load(mesh,in); - // load nested data - auto nested = nested_level_type::load(mesh,in); - // build level data - return MeshPropertiesLevels(std::move(data),std::move(nested)); - } - - template - static MeshPropertiesLevels interpret(const Mesh& mesh, utils::RawBuffer& raw) { - // interpret property data - auto data = level_data::interpret(mesh,raw); - // interpret nested data - auto nested = nested_level_type::interpret(mesh,raw); - // build level data - return MeshPropertiesLevels(std::move(data),std::move(nested)); - } - - }; - - - template - class MeshPropertiesLevels { - - using level_data = MeshPropertiesData; - - level_data data; - - MeshPropertiesLevels(level_data&& data) : data(std::move(data)) {} - - public: - - template - MeshPropertiesLevels(const Mesh& mesh) - : data(mesh) {} - - template - std::enable_if_t& - get() { - return data; - } - - template - const std::enable_if_t& - get() const { - return data; - } - - void store(std::ostream& out) const { - // write property data - data.store(out); - } - - template - static MeshPropertiesLevels load(const Mesh& mesh, std::istream& in) { - // load property data - return level_data::load(mesh,in); - } - - template - static MeshPropertiesLevels interpret(const Mesh& mesh, utils::RawBuffer& raw) { - // interpret property data - return level_data::interpret(mesh,raw); - } - - }; - - } - - template - class MeshProperties { - - template - friend class Mesh; - - using DataStore = detail::MeshPropertiesLevels; - - DataStore data; - - template - MeshProperties(const Mesh& mesh) : data(mesh) {} - - MeshProperties(DataStore&& data) : data(std::move(data)) {} - - public: - - template - MeshData& - get() { - return data.template get().template get(); - } - - template - const MeshData& - get() const { - return data.template get().template get(); - } - - template - typename Property::value_type& get(const NodeRef& node) { - return get()[node]; - } - - template - const typename Property::value_type& get(const NodeRef& node) const { - return get()[node]; - } - - // -- load / store for files -- - - void store(std::ostream& out) const { - // write property data - data.store(out); - } - - template - static MeshProperties load(const Mesh& mesh, std::istream& in) { - // forward call to data store - return MeshProperties(DataStore::load(mesh,in)); - } - - template - static MeshProperties interpret(const Mesh& mesh, utils::RawBuffer& raw) { - // forward call to data store - return MeshProperties(DataStore::interpret(mesh,raw)); - } - }; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/scalar.h b/vendor/allscale/api/user/data/scalar.h deleted file mode 100644 index a8bcee806..000000000 --- a/vendor/allscale/api/user/data/scalar.h +++ /dev/null @@ -1,216 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/api/core/data.h" - -#include "allscale/utils/assert.h" -#include "allscale/utils/printer/join.h" -#include "allscale/utils/large_array.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - // --------------------------------------------------------- - // Declarations - // --------------------------------------------------------- - - - /** - * A data item wrapper for scalar values. - */ - template - class Scalar; - - - // --------------------------------------------------------- - // Definitions - // --------------------------------------------------------- - - - namespace detail { - - /** - * The type utilized to address regions of scalar data items. The region - * defines the unit region of either being present or not. - */ - class ScalarRegion { - - // indicating whether the value is present or not - bool flag; - - public: - - ScalarRegion() = default; - - ScalarRegion(bool value) : flag(value) {} - - bool operator==(const ScalarRegion& other) const { - return flag == other.flag; - } - - bool operator!=(const ScalarRegion& other) const { - return flag != other.flag; - } - - /** - * The empty check returns true if the value is not present. - */ - bool empty() const { - return !flag; - } - - static ScalarRegion merge(const ScalarRegion& a, const ScalarRegion& b) { - return { a.flag || b.flag }; - } - - static ScalarRegion intersect(const ScalarRegion& a, const ScalarRegion& b) { - return { a.flag && b.flag }; - } - - static ScalarRegion difference(const ScalarRegion& a, const ScalarRegion& b) { - return a.flag && !b.flag; - } - - static ScalarRegion span(const ScalarRegion& a, const ScalarRegion& b) { - return merge(a,b); - } - - /** - * An operator to load an instance of this range from the given archive. - */ - static ScalarRegion load(utils::ArchiveReader& reader) { - return reader.read(); - } - - /** - * An operator to store an instance of this range into the given archive. - */ - void store(utils::ArchiveWriter& writer) const { - writer.write(flag); - } - - friend std::ostream& operator<<(std::ostream& out, const ScalarRegion& region) { - return out << (region.flag ? "+" : "-"); - } - - }; - - - /** - * A scalar data item fragment provides the capability of maintaining a copy of - * the covered scalar value. - */ - template - class ScalarFragment { - - // the stored value - T value; - - // the region covered -- thus, indicating whether the value is present or not - ScalarRegion covered; - - friend class Scalar; - - public: - - using region_type = ScalarRegion; - using shared_data_type = core::no_shared_data; - using facade_type = Scalar; - - ScalarFragment(const core::no_shared_data&, const ScalarRegion& region = ScalarRegion()) - : covered(region) {} - - const ScalarRegion& getCoveredRegion() const { - return covered; - } - - void resize(const ScalarRegion& newSize) { - covered = newSize; - } - - void insert(const ScalarFragment& f, const ScalarRegion& region) { - assert_false(covered.empty()); - if (region.empty()) return; - value = f.value; - } - - void extract(utils::ArchiveWriter& writer, const ScalarRegion& region) const { - // make sure the requested region is covered by this fragment - assert_pred2(core::isSubRegion, region, getCoveredRegion()) - << "The requested region is not covered by this fragment."; - - // start by adding the extracted region - writer.write(region); - - // if the requested region is empty, we are done - if (region.empty()) return; - - // otherwise we extract the data stored in this fragment - writer.write(value); - } - - void insert(utils::ArchiveReader& reader) { - - // start by reading the encoded region - auto region = reader.read(); - - // make sure the inserted region is covered by this fragment (size is not changing) - assert_pred2(core::isSubRegion, region, getCoveredRegion()) - << "The region to be imported is not covered by this fragment!"; - - // if the imported data is empty, we are done - if (region.empty()) return; - - // otherwise we load the data from the archive - value = reader.read(); - } - - Scalar mask() { - return Scalar(*this); - } - - }; - - } - - - template - class Scalar : public core::data_item> { - - friend class detail::ScalarFragment; - - std::unique_ptr> owned; - - detail::ScalarFragment* base; - - Scalar(detail::ScalarFragment& fragment) - : base(&fragment) {} - - public: - - Scalar() - : owned(std::make_unique>(core::no_shared_data())), base(owned.get()) {} - - T& get() { - return data_item_element_access(*this, detail::ScalarRegion(true), base->value); - } - - const T& get() const { - return data_item_element_access(*this, detail::ScalarRegion(true), base->value); - } - - void set(const T& newValue) { - data_item_element_access(*this, detail::ScalarRegion(true), base->value) = newValue; - } - - }; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/data/static_grid.h b/vendor/allscale/api/user/data/static_grid.h deleted file mode 100644 index 1af2d9e42..000000000 --- a/vendor/allscale/api/user/data/static_grid.h +++ /dev/null @@ -1,342 +0,0 @@ -#pragma once - -#include "allscale/api/user/data/grid.h" - -namespace allscale { -namespace api { -namespace user { -namespace data { - - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - - using coordinate_type = std::int64_t; - - template - using StaticGridPoint = GridPoint; - - template - using StaticGridBox = GridBox; - - template - using StaticGridRegion = GridRegion; - - template - class StaticGridFragment; - - template - class StaticGrid; - - - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - - template - class StaticGridFragment { - public: - - enum { Dims = sizeof...(Sizes) }; - - using shared_data_type = core::no_shared_data; - using facade_type = StaticGrid; - using region_type = StaticGridRegion; - - private: - - using point = StaticGridPoint; - using box = StaticGridBox; - - region_type size; - - utils::LargeArray data; - - public: - - StaticGridFragment(const region_type& size = region_type()) - : StaticGridFragment(core::no_shared_data(), size) {} - - StaticGridFragment(const core::no_shared_data&, const region_type& size = region_type()) : size(size), data(area(totalSize())) { - // allocate covered data space - size.scanByLines([&](const point& a, const point& b) { - data.allocate(flatten(a),flatten(b)); - }); - } - - bool operator==(const StaticGridFragment& other) const { - return data == other.data; - } - - T& operator[](const point& pos) { - return data[flatten(pos)]; - } - - const T& operator[](const point& pos) const { - return data[flatten(pos)]; - } - - StaticGrid mask() { - return StaticGrid(*this); - } - - const region_type& getCoveredRegion() const { - return size; - } - - point totalSize() const { - return point({ Sizes... }); - } - - void resize(const region_type& newSize) { - - // get the difference - region_type plus = region_type::difference(newSize,size); - region_type minus = region_type::difference(size,newSize); - - // update the size - size = newSize; - - // allocated new data - plus.scanByLines([&](const point& a, const point& b){ - data.allocate(flatten(a),flatten(b)); - }); - - // free excessive memory - minus.scanByLines([&](const point& a, const point& b){ - data.free(flatten(a),flatten(b)); - }); - } - - void insert(const StaticGridFragment& other, const region_type& area) { - assert_true(core::isSubRegion(area,other.size)) << "New data " << area << " not covered by source of size " << size << "\n"; - assert_true(core::isSubRegion(area,size)) << "New data " << area << " not covered by target of size " << size << "\n"; - - // copy data line by line using memcpy - area.scanByLines([&](const point& a, const point& b){ - auto start = flatten(a); - auto length = (flatten(b) - start) * sizeof(T); - std::memcpy(&data[start],&other.data[start],length); - }); - } - - void extract(utils::ArchiveWriter& writer, const region_type& region) const { - - // make sure the region is covered - assert_pred2(core::isSubRegion, region, getCoveredRegion()) - << "This fragment does not contain all of the requested data!"; - - // write the requested region to the archive - writer.write(region); - - // add the data - region.scan([&](const point& p){ - writer.write((*this)[p]); - }); - } - - void insert(utils::ArchiveReader& reader) { - - // extract the covered region contained in the archive - auto region = reader.read(); - - // check that it is fitting - assert_pred2(core::isSubRegion, region, getCoveredRegion()) - << "Targeted fragment does not cover data to be inserted!"; - - // insert the data - region.scan([&](const point& p){ - (*this)[p] = reader.read(); - }); - } - - private: - - static std::size_t area(const StaticGridPoint& pos) { - std::size_t res = 1; - for(std::size_t i=0; i& pos) const { - - static const std::array totalSize{ { Sizes ... } }; - - coordinate_type res = 0; - coordinate_type size = 1; - - for(int i=Dims-1; i>=0; i--) { - res += pos[i] * size; - size *= totalSize[i]; - } - - return res; - } - - }; - - template - class StaticGrid : public core::data_item> { - - /** - * A pointer to an underlying fragment owned if used in an unmanaged state. - */ - std::unique_ptr> owned; - - /** - * A reference to the fragment instance operating on, referencing the owned fragment or an externally managed one. - */ - StaticGridFragment* base; - - /** - * Enables fragments to use the private constructor below. - */ - friend class StaticGridFragment; - - /** - * The constructor to be utilized by the fragment to create a facade for an existing fragment. - */ - StaticGrid(StaticGridFragment& base) : base(&base) {} - - public: - - /** - * The number of dimensions. - */ - enum { dimensions = sizeof...(Sizes) }; - - /** - * The type of coordinate utilized by this type. - */ - using coordinate_type = StaticGridPoint; - - /** - * The type of region utilized by this type. - */ - using region_type = StaticGridRegion; - - /** - * Creates a new map covering the given region. - */ - StaticGrid() - : owned(std::make_unique>(region_type(0,size()))), base(owned.get()) {} - - /** - * A constructor for static grids accepting a size parameter, to be compatible to the dynamic sized grid. - */ - StaticGrid(const StaticGridPoint& size) - : owned(std::make_unique>(region_type(0,size))), base(owned.get()) { - assert_eq(size,this->size()) << "Initialization of invalid sized static grid."; - } - - /** - * Disable copy construction. - */ - StaticGrid(const StaticGrid&) = delete; - - /** - * Enable move construction. - */ - StaticGrid(StaticGrid&&) = default; - - /** - * Disable copy-assignments. - */ - StaticGrid& operator=(const StaticGrid&) = delete; - - /** - * Enable move assignments. - */ - StaticGrid& operator=(StaticGrid&&) = default; - - /** - * Obtains the full size of this grid. - */ - coordinate_type size() const { - return coordinate_type({ Sizes ... }); - } - - /** - * Compare the full content of the grid. - */ - bool operator==(const StaticGrid& other) const { - return *base == *other.base; - } - - /** - * Provides read/write access to one of the values stored within this grid. - */ - T& operator[](const coordinate_type& index) { - allscale_check_bounds(index, (*this)); - return data_item_element_access(*this, region_type::single(index), (*base)[index]); - } - - /** - * Provides read access to one of the values stored within this grid. - */ - const T& operator[](const coordinate_type& index) const { - allscale_check_bounds(index, (*this)); - return data_item_element_access(*this, region_type::single(index), (*base)[index]); - } - - /** - * A sequential scan over all elements within this grid, providing - * read-only access. - */ - template - void forEach(const Op& op) const { - allscale::api::user::algorithm::detail::forEach( - coordinate_type(0), - size(), - [&](const auto& pos){ - op((*this)[pos]); - } - ); - } - - /** - * A sequential scan over all elements within this grid, providing - * read/write access. - */ - template - void forEach(const Op& op) { - allscale::api::user::algorithm::detail::forEach( - coordinate_type(0), - size(), - [&](const auto& pos){ - op((*this)[pos]); - } - ); - } - - /** - * A sequential scan over all elements within this grid, providing - * read-only access. - */ - template - auto pforEach(const Op& op) const { - return algorithm::pfor(coordinate_type(0), size(), [&](const auto& pos) { op((*this)[pos]); }); - } - - /** - * A parallel scan over all elements within this grid, providing - * read/write access. - */ - template - auto pforEach(const Op& op) { - return algorithm::pfor(coordinate_type(0), size(), [&](const auto& pos) { op((*this)[pos]); }); - } - - }; - -} // end namespace data -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/api/user/save_to_binary.h b/vendor/allscale/api/user/save_to_binary.h deleted file mode 100644 index 2f6de85f8..000000000 --- a/vendor/allscale/api/user/save_to_binary.h +++ /dev/null @@ -1,124 +0,0 @@ -#pragma once - -#include "allscale/api/core/io.h" -#include "allscale/api/user/algorithm/pfor.h" - - -namespace allscale { -namespace api { -namespace user { - -// Save vector of vectors to binary in parallel -template -void saveVecVecToFile(std::vector> vecVec, std::string filename, size_t innerSize) { - core::FileIOManager& manager = core::FileIOManager::getInstance(); - size_t outerSize = vecVec.size(); - - // generate output data - core::Entry binary = manager.createEntry(filename, core::Mode::Binary); - auto fout = manager.openOutputStream(binary); - -// fout.write(innerSize); - - std::vector idxVec; - for(size_t i = 0; i < innerSize; ++i) - idxVec.push_back(i); - - algorithm::pfor(idxVec, [&](size_t& i) { - fout.atomic([&](auto& out) { - // write preamble - out.write(i); - - // write data - for(size_t j = 0; j < outerSize; ++j) { - out.write(vecVec[j][i]); - } - }); - }); - - manager.close(fout); - -} - -template -void saveVecVecToFileMM(std::vector> vecVec, std::string filename, unsigned outerSize, unsigned innerSize) { - core::FileIOManager& manager = core::FileIOManager::getInstance(); - - // generate output data - core::Entry binary = manager.createEntry(filename, core::Mode::Binary); - core::MemoryMappedOutput fout = manager.openMemoryMappedOutput(binary, sizeof(T)* outerSize*innerSize); - - std::vector idxVec; - for(size_t i = 0; i < innerSize; ++i) - idxVec.push_back(i); - - auto dataOut = &fout.access();//std::array>(); - algorithm::pfor(idxVec, [&](size_t& i) { - // write data - for(size_t j = 0; j < outerSize; ++j) { - dataOut[i*outerSize + j] = vecVec[j][i]; - } - }); - manager.close(fout); -} - -// Read vector of vectors to binary in parallel -template -std::vector> readVecVecFromFile(std::string filename, size_t outerSize, size_t innerSize) { - std::vector> vecVec; - core::FileIOManager& manager = core::FileIOManager::getInstance(); - - core::Entry binary = manager.createEntry(filename, core::Mode::Binary); - auto fin = manager.openInputStream(binary); - - for(size_t j = 0; j < outerSize; ++j) { - vecVec.push_back(std::vector()); - for(size_t i = 0; i < innerSize; ++i) - vecVec[j].push_back(T()); - } - - for(size_t i = 0; i < innerSize; ++i) { - // read position from file - size_t idx = fin.read(); - - for(size_t j = 0; j < outerSize; ++j) { - // read data - vecVec[j][idx] = (fin.read()); - } - } - - manager.close(fin); - return vecVec; -} - - -// Read vector of vectors to binary in parallel -template -std::vector> readVecVecFromFileMM(std::string filename, unsigned outerSize, unsigned innerSize) { - std::vector> vecVec; - core::FileIOManager& manager = core::FileIOManager::getInstance(); - - core::Entry binary = manager.createEntry(filename, core::Mode::Binary); - auto fin = manager.openMemoryMappedInput(binary); - auto dataIn = &fin.access();//>(); - - for(size_t j = 0; j < outerSize; ++j) { - vecVec.push_back(std::vector()); - for(size_t i = 0; i < innerSize; ++i) - vecVec[j].push_back(T()); - } - - for(size_t i = 0; i < innerSize; ++i) { - for(size_t j = 0; j < outerSize; ++j) { - // read data - vecVec[j][i] = dataIn[i*outerSize + j]; - } - } - - manager.close(fin); - return vecVec; -} - -} // end namespace user -} // end namespace api -} // end namespace allscale diff --git a/vendor/allscale/utils/array_utils.h b/vendor/allscale/utils/array_utils.h deleted file mode 100644 index 5de03f7dc..000000000 --- a/vendor/allscale/utils/array_utils.h +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include - -namespace allscale { -namespace utils { - -namespace { - template - struct array_builder { - template - std::array operator()(Fn&& fn, T&&... vals) const { - return array_builder{}(std::forward(fn), std::forward(vals)..., fn()); - } - }; - - template - struct array_builder { - template - std::array operator()(Fn&&, T&&... vals) const { - return { { std::forward(vals)... } }; - } - }; -} - -/* - * Create an Array of N elements, initialized with the elements returned by fn. Can be used to create an array of elements without default constructor - * - */ -template::type> -std::array build_array(Fn&& fn) { - return array_builder<0, N, U>()(std::forward(fn)); -} - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/assert.h b/vendor/allscale/utils/assert.h deleted file mode 100644 index 8e2ca7cc9..000000000 --- a/vendor/allscale/utils/assert.h +++ /dev/null @@ -1,132 +0,0 @@ -#pragma once - -/** - * This header file defines a set of macros to define more readable and flexible assertions within - * program code. Also, macros supporting the declaration of variables only required for checking - * assertions are supported. As all assertions, in case the macro NDEBUG is defined, they will be - * ignored. In those cases, variables declared using the 'assert_decl' macro will not be declared. - */ - -#include - -#define __allscale_xstr_(a) __allscale_str_(a) -#define __allscale_str_(a) #a - -#include "allscale/utils/unused.h" - -#if defined(NDEBUG) - -#define _assert_ignore \ - if(false) std::cerr << "" - -#define assert_decl(_DECL) ((void)0) -#define assert_true(_COND) _assert_ignore -#define assert_eq(_a, _b) _assert_ignore -#define assert_ne(_a, _b) _assert_ignore -#define assert_lt(_a, _b) _assert_ignore -#define assert_le(_a, _b) _assert_ignore -#define assert_gt(_a, _b) _assert_ignore -#define assert_ge(_a, _b) _assert_ignore -#define assert_fail() _assert_ignore -#define assert_pred1(_a, _b) _assert_ignore -#define assert_not_pred1(_a, _b) _assert_ignore -#define assert_pred2(_a, _b, _c) _assert_ignore -#define assert_not_pred2(_a, _b, _c) _assert_ignore - -#else -#include - - -namespace insieme { -namespace utils { - namespace detail { - - struct LazyAssertion { - bool value; - LazyAssertion(bool value) : value(value) {} - ~LazyAssertion() { - if(!value) { - std::cerr << "\n"; - abort(); - } - } - operator bool() const { - return !value; - } - }; - - } // end namespace detail -} // end namespace utils -} // end namespace insieme - -#define assert_decl(_DECL) _DECL - -#define assert_true(_COND) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((bool)(_COND))) \ - std::cerr << "\nAssertion " #_COND " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#define assert_eq(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) == (_B))) \ - std::cerr << "\nAssertion " #_A " == " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_ne(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) != (_B))) \ - std::cerr << "\nAssertion " #_A " != " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_lt(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) < (_B))) \ - std::cerr << "\nAssertion " #_A " < " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_le(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) <= (_B))) \ - std::cerr << "\nAssertion " #_A " <= " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_gt(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) > (_B))) \ - std::cerr << "\nAssertion " #_A " > " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_ge(_A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((_A) >= (_B))) \ - std::cerr << "\nAssertion " #_A " >= " #_B " of " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n\t" #_A " = " << (_A) << "\n\t" #_B " = " << (_B) << "\n" - -#define assert_fail() \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion(false)) std::cerr << "\nAssertion failed in " __FILE__ ":" __allscale_xstr_(__LINE__) " - " - -#define assert_pred1(_P, _A) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((bool)((_P)(_A)))) \ - std::cerr << "\nAssertion " #_P "(" #_A ") with " #_A " = " << (_A) << " in " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#define assert_not_pred1(_P, _A) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion(!(bool)((_P)(_A)))) \ - std::cerr << "\nAssertion !" #_P "(" #_A ") with " #_A " = " << (_A) << " in " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#define assert_pred2(_P, _A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion((bool)((_P)(_A, _B)))) \ - std::cerr << "\nAssertion " #_P "(" #_A ", " #_B ") with\n " #_A " = " << (_A) << "\n " #_B " = " << (_B) \ - << "\n in " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#define assert_not_pred2(_P, _A, _B) \ - if(__allscale_unused auto __allscale_temp_object_ = insieme::utils::detail::LazyAssertion(!(bool)((_P)(_A, _B)))) \ - std::cerr << "\nAssertion !" #_P "(" #_A ", " #_B ") with\n " #_A " = " << (_A) << "\n " #_B " = " << (_B) \ - << "\n in " __FILE__ ":" __allscale_xstr_(__LINE__) " failed!\n" - -#endif - -// ------ derived definitions ------ - -#define assert_false(_COND) assert_true(!(_COND)) -#define assert_not_implemented() assert_fail() << "Not implemented functionality in " __FILE__ ":" __allscale_xstr_(__LINE__) "\n" - -// --------- bounds checks --------- - -#if defined(ALLSCALE_CHECK_BOUNDS) - -#define allscale_check_bounds(_INDEX, _CONTAINER) \ - assert_true((_INDEX) >= 0 && (_INDEX) < (_CONTAINER).size()) << "Index " << (_INDEX) << " out of bounds " << (_CONTAINER).size(); - -#else - -#define allscale_check_bounds(_INDEX, _CONTAINER) \ - if(false) std::cerr << "" - -#endif \ No newline at end of file diff --git a/vendor/allscale/utils/bag.h b/vendor/allscale/utils/bag.h deleted file mode 100644 index ed1a61f00..000000000 --- a/vendor/allscale/utils/bag.h +++ /dev/null @@ -1,117 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/printer/join.h" - -namespace allscale { -namespace utils { - - /** - * A data structure for maintaining a collection of - * objects with duplicates. - */ - template - class Bag { - - // the element type maintained in this bag - using element_type = T; - - // internally, the data is maintained in a simple list - std::vector data; - - public: - - /** - * Tests whether this bag is empty or not. - */ - bool empty() const { - return data.empty(); - } - - /** - * Determines the number of elements in this bag. - */ - std::size_t size() const { - return data.size(); - } - - /** - * Inserts a new element in this bag. - */ - void insert(const T& element) { - data.push_back(element); - } - - /** - * Removes an element from this bag. - */ - void remove(const T& element) { - auto pos = std::find(data.begin(),data.end(),element); - if (pos == data.end()) return; - data.erase(pos); - } - - /** - * Tests whether the given element is contained within this bag. - */ - bool contains(const T& element) { - auto pos = std::find(data.begin(),data.end(),element); - return pos != data.end(); - } - - // add support for scans - - /** - * Obtains an iterator pointing to the start of the range of - * elements contained in this bag. - */ - auto begin() const { - return data.begin(); - } - - /** - * Obtains an iterator pointing to the end of the range of - * elements contained in this bag. - */ - auto end() const { - return data.end(); - } - - /** - * Runs a combined update and filter operation on the elements - * in this bag. The elements are passed by reference to the given - * body -- which may return false if elements shell be removed, tue - * otherwise. - */ - template - void updateFilter(const Body& body) { - // remove all elements where the predicate is violated - auto newEnd = std::remove_if(data.begin(), data.end(), [&](T& i) { return !body(i); }); - data.erase(newEnd,data.end()); - } - - /** - * Removes all elements from this bag which do not satisfy the - * given predicates. - */ - template - void filter(const Predicate& pred) { - updateFilter([&](const T& i) { - return pred(i); - }); - } - - /** - * Adds printer support to this bag. - */ - friend std::ostream& operator<<(std::ostream& out, const Bag& bag) { - return out << "{" << utils::join(",",bag.data) << "}"; - } - - }; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/bitmanipulation.h b/vendor/allscale/utils/bitmanipulation.h deleted file mode 100644 index 1391ddbcc..000000000 --- a/vendor/allscale/utils/bitmanipulation.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#ifdef _MSC_VER - #include -#endif - -namespace allscale { -namespace utils { - - /** - * A wrapper function for counting leading zeros - */ - inline int countLeadingZeros(unsigned value) { - #ifdef _MSC_VER - unsigned long retVal = 0; - if(_BitScanReverse(&retVal, value)) - return 31-retVal; - // all zeros is undefined behavior, we simply return 32 - return 32; - #else - return __builtin_clz(value); - #endif - } - - /** - * A wrapper function for counting trailing zeros - */ - inline int countTrailingZeros(unsigned value) { - #ifdef _MSC_VER - unsigned long retVal = 0; - if(_BitScanForward(&retVal, value)) - return retVal; - // all zeros is undefined behavior, we simply return 32 - return 32; - #else - return __builtin_ctz(value); - #endif - } - - /** - * A wrapper function for counting 1-bits - */ - inline int countOnes(unsigned value) { - #ifdef _MSC_VER - return __popcnt(value); - #else - return __builtin_popcount(value); - #endif - } - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/concepts.h b/vendor/allscale/utils/concepts.h deleted file mode 100644 index fc5d6e72d..000000000 --- a/vendor/allscale/utils/concepts.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include - -namespace allscale { -namespace utils { - - template - struct is_equality_comparable : public std::false_type {}; - - template - struct is_equality_comparable() == std::declval()),bool>::value && - std::is_convertible() != std::declval()),bool>::value, - void>::type> : public std::true_type {}; - - - template - struct is_value : public std::false_type {}; - - template - struct is_value::value && - - // regions need to be default-constructible - std::is_copy_constructible::value && - - // regions need to be default-constructible - std::is_copy_assignable::value && - - // regions need to be destructible - std::is_destructible::value && - - // regions need to be equality comparable - utils::is_equality_comparable::value, - - void>::type> : public std::true_type {}; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/functional_utils.h b/vendor/allscale/utils/functional_utils.h deleted file mode 100644 index fcc17a271..000000000 --- a/vendor/allscale/utils/functional_utils.h +++ /dev/null @@ -1,143 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/type_list.h" - -namespace allscale { -namespace utils { - - - // -------------------- Function Traits for Lambdas ---------------------------- - - namespace detail { - - template struct lambda_traits_helper { }; - - // get rid of const modifier - template - struct lambda_traits_helper : public lambda_traits_helper {}; - - // get rid of pointers - template - struct lambda_traits_helper : public lambda_traits_helper {}; - - // handle class of member function pointers - template - struct lambda_traits_helper : public lambda_traits_helper { - typedef C class_type; - }; - - // get rid of const modifier - template - struct lambda_traits_helper : public lambda_traits_helper {}; - - template - struct lambda_traits_helper - { - enum { arity = 0 }; - typedef R result_type; - typedef type_list<> argument_types; - }; - - template - struct lambda_traits_helper - { - enum { arity = 1 }; - typedef R result_type; - typedef T1 arg1_type; - typedef T1 argument_type; - typedef type_list argument_types; - }; - - template - struct lambda_traits_helper - { - enum { arity = 2 }; - typedef R result_type; - typedef T1 arg1_type; - typedef T2 arg2_type; - typedef T1 first_argument_type; - typedef T2 second_argument_type; - typedef type_list argument_types; - }; - - template - struct lambda_traits_helper { - enum { arity = 3 + sizeof...(A) }; - typedef R result_type; - typedef T1 arg1_type; - typedef T2 arg2_type; - typedef T3 arg3_type; - typedef type_list argument_types; - }; - - - template - struct call_operator_type { - using type = decltype(Lambda::operator()); - }; - - template - decltype(&Lambda::operator()) getCallOperator() { - return &Lambda::operator(); - } - - /* - psalz: MSVC2015 complains about multiple definitions here. - grid.h doesn't seem to need it => commented out. - template - decltype(&Lambda::template operator()) getCallOperator() { - return &Lambda::template operator(); - } - - template - decltype(&Lambda::template operator()) getCallOperator() { - return &Lambda::template operator(); - } - - template - decltype(&Lambda::template operator()) getCallOperator() { - return &Lambda::template operator(); - } - */ - - } // end namespace detail - - - template - struct lambda_traits : public detail::lambda_traits_helper())> { }; - - template - struct lambda_traits : public detail::lambda_traits_helper { }; - - template - struct lambda_traits : public lambda_traits { }; - - template - struct lambda_traits : public lambda_traits { }; - - template - struct lambda_traits : public detail::lambda_traits_helper { }; - - template - struct lambda_traits : public lambda_traits { }; - - - - template - struct is_std_function : public std::false_type {}; - - template - struct is_std_function> : public std::true_type {}; - - template - struct is_std_function : public is_std_function {}; - - template - struct is_std_function : public is_std_function {}; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/io_utils.h b/vendor/allscale/utils/io_utils.h deleted file mode 100644 index 77f162bbb..000000000 --- a/vendor/allscale/utils/io_utils.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace allscale { -namespace utils { - - // -- some convenience utilities for stream based IO operations -- - - template - void write(std::ostream& out, T value) { - out.write((char*)&value, sizeof(T)); - } - - template - void write(std::ostream& out, const Iter& a, const Iter& b) { - for(auto it = a; it != b; ++it) { - out.write((char*)&(*it), sizeof(typename std::remove_reference::type)); - } - } - - template - T read(std::istream& in) { - T value = T(); - in.read((char*)&value, sizeof(T)); - return value; - } - - template - void read(std::istream& in, const Iter& a, const Iter& b) { - for(auto it = a; it != b; ++it) { - *it = read::type>(in); - } - } - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/large_array.h b/vendor/allscale/utils/large_array.h deleted file mode 100644 index 257a19573..000000000 --- a/vendor/allscale/utils/large_array.h +++ /dev/null @@ -1,609 +0,0 @@ -#pragma once - -#ifndef _MSC_VER - #include - #include -#else - #include - #include - -#endif - -#include - -#include -#include - -#include "allscale/utils/assert.h" - -#include "allscale/utils/printer/vectors.h" - -namespace allscale { -namespace utils { - - - namespace detail { - - /** - * Intervals are utilized by the LargeArray class to manage active intervals -- those intervals - * for which the stored values need to be preserved. - */ - class Intervals { - - /** - * A list of start/end values of the covered intervals. - * For instance, the values [10,15,18,35] correspond to the - * intervals [10,..,15) and [18,..,35). The intervals are sorted. - * The lower boundary is included, the upper boundary not. - */ - std::vector data; - - public: - - /** - * A factory function creating a list of intervals consisting of a single, - * closed range [begin,end). - */ - static Intervals fromRange(std::size_t begin, std::size_t end) { - Intervals res; - res.add(begin,end); - return res; - } - - /** - * Compares this and the given intervals for equality. - */ - bool operator==(const Intervals& other) const { - return data == other.data; - } - - /** - * Compares this and the given intervals for inequality. - */ - bool operator!=(const Intervals& other) const { - return data != other.data; - } - - /** - * Checks whether this is interval is empty. - */ - bool empty() const { - return data.empty(); - } - - /** - * Adds a new interval to the covered intervals. - * @param from the start (inclusive) of the interval to be added - * @param to the end (exclusive) of the interval to be added - */ - void add(std::size_t from, std::size_t to) { - - // skip empty ranges - if (from >= to) return; - - // insert first element - if (data.empty()) { - data.push_back(from); - data.push_back(to); - } - - // find positions for from and to - auto it_begin = data.begin(); - auto it_end = data.end(); - - auto it_from = std::upper_bound(it_begin, it_end, from); - auto it_to = std::upper_bound(it_begin, it_end, to-1); - - std::size_t idx_from = std::distance(it_begin,it_from); - std::size_t idx_to = std::distance(it_begin,it_to); - - // whether insertion is at a common place - if (it_from == it_to) { - - // if it is between ranges ... - if (idx_to % 2 == 0) { - - // check whether it is a gap closing a range - if (idx_to > 1 && idx_to < data.size() && data[idx_to-1] == from && data[idx_to] == to) { - data.erase(it_from-1,it_to+1); - return; - } - - // check whether it is connecting to the one on the left - if (idx_to > 1 && data[idx_to-1] == from) { - data[idx_to-1] = to; - return; - } - - // check whether it is connecting to the one on the right - if (idx_to < data.size() && data[idx_to] == to) { - data[idx_to] = from; - return; - } - } - - // check whether it is the end - if (it_from == it_end) { - data.push_back(from); - data.push_back(to); - return; - } - - // check whether it is within an interval - if ((idx_from % 2) == 1) { - return; // nothing to add - } - - // insert new pair at insertion position - data.insert(it_from,2,from); - data[idx_from+1] = to; - - return; - } - - // if from references an existing start value => correct it - if (idx_from % 2 == 0) { - data[idx_from] = from; - ++it_from; - } else { - // all fine - } - - // correct end of last closed interval - if (idx_to % 2 == 0) { - data[idx_to-1] = to; - it_to -= 1; - } else { - // nothing to do here - } - - if (it_from < it_to) data.erase(it_from,it_to); - - } - - /** - * Removes the given interval from the covered range. - * @param from the start (inclusive) of the interval to be removed - * @param to the end (exclusive) of the interval to be removed - */ - void remove(std::size_t from, std::size_t to) { - - // quick exits - if (from >= to) return; - if (data.empty()) return; - - // find positions for from and to - auto it_begin = data.begin(); - auto it_end = data.end(); - - auto it_from = std::upper_bound(it_begin, it_end, from); - auto it_to = std::upper_bound(it_begin, it_end, to-1); - - std::size_t idx_from = std::distance(it_begin,it_from); - std::size_t idx_to = std::distance(it_begin,it_to); - - // in case they are both at the same spot - if (idx_from == idx_to) { - - // if it is between two intervals .. - if (idx_from % 2 == 0) return; // .. there is nothing to delete - - // it is within a single interval - assert_eq(1, idx_from % 2); - - // check whether full interval is covered - if (data[idx_from-1] == from && data[idx_to] == to) { - data.erase(it_from-1,it_to+1); - return; - } - - // check if lower boundary matches - if (data[idx_from-1] == from) { - data[idx_from-1] = to; - return; - } - - // check if lower boundary matches - if (data[idx_to] == to) { - data[idx_to] = from; - return; - } - - data.insert(it_from,2,from); - data[idx_from+1] = to; - return; - - } - - if (idx_from % 2 == 1) { - data[idx_from] = from; - it_from++; - } - - if (idx_to % 2 == 1) { - data[idx_to-1] = to; - it_to--; - } - - // delete nodes in-between - data.erase(it_from,it_to); - return; - - } - - /** - * Removes the given intervals from the covered range. - * @param other the intervals to be removed - */ - void remove(const Intervals& other) { - // iteratively remove the elements of the given interval - for(std::size_t i =0; i::min()); - data.insert(data.end(), std::numeric_limits::max()); - - // remove first pair if it is empty - if (data[0] == data[1]) { - for(std::size_t i = 0; i= to) return true; - auto begin = data.begin(); - auto end = data.end(); - auto a = std::upper_bound(begin, end, from); - auto b = std::upper_bound(begin, end, to-1); - return a == b && a != end && ((std::distance(begin,a) % 2) == 1); - } - - /** - * Tests whether any the points within the range [from,...,to) are covered by this intervals. - */ - bool coversAny(std::size_t from, std::size_t to) const { - if (from >= to) return false; - auto begin = data.begin(); - auto end = data.end(); - auto a = std::upper_bound(begin, end, from); - auto b = std::upper_bound(begin, end, to-1); - return a < b || (a == b && a != end && ((std::distance(begin,a) % 2) == 1)); - } - - /** - * Swaps the content of this interval with the given one. - */ - void swap(Intervals& other) { - data.swap(other.data); - } - - /** - * Invokes the given function for each index in the covered intervals. - */ - template - void forEach(const Fun& fun) const { - // iterate through the individual intervals - for(std::size_t i =0; i - class LargeArray { - - /** - * A pointer to the first element of the array. - */ - T* data; - - /** - * The size of this large array. - */ - std::size_t size; - - /** - * The list of active ranges in this large array (for which the memory is kept alive). - */ - detail::Intervals active_ranges; - - public: - - /** - * Creates a new large array of the given size. - */ - LargeArray(std::size_t size) : data(nullptr), size(size) { - - // check whether there is something to allocate - if (size == 0) return; - - // allocate the address space - #ifdef _MSC_VER - data = (T*)malloc(sizeof(T)*size); - assert_true(data != nullptr) << "Failed to allocate memory of size" << sizeof(T)*size; - #else - data = (T*)mmap(nullptr,sizeof(T)*size, - PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, - -1,0 - ); - #endif - assert_ne((void*)-1,(void*)data); - } - - /** - * Explicitly deleted copy constructor. - */ - LargeArray(const LargeArray&) = delete; - - /** - * A move constructor for large arrays. - */ - LargeArray(LargeArray&& other) - : data(other.data), size(other.size), active_ranges(std::move(other.active_ranges)) { - assert_true(other.active_ranges.empty()); - other.data = nullptr; - } - - /** - * Destroys this array. - */ - ~LargeArray() { - - // if there is no data, nothing to do - if (data == nullptr) return; - - // call the destructor for the remaining objects (if required) - if (!std::is_trivially_destructible::value) { - active_ranges.forEach([this](std::size_t i){ - data[i].~T(); - }); - } - - // free the data - #ifdef _MSC_VER - ::free(data); - #else - munmap(data,sizeof(T)*size); - #endif - } - - /** - * Explicitly deleted copy-assignment operator. - */ - LargeArray& operator=(const LargeArray&) = delete; - - /** - * Implementation of move assignment operator. - */ - LargeArray& operator=(LargeArray&& other) { - assert_ne(data,other.data); - if (data) { - #ifdef _MSC_VER - ::free(data); - #else - munmap(data, sizeof(T)*size); - #endif - } - std::swap(data,other.data); - size = other.size; - active_ranges.swap(other.active_ranges); - return *this; - } - - bool operator==(const LargeArray& other) const { - // quick check - if (this == &other) return true; - - // check the same size - if (size != other.size) return false; - - // make sure both have allocated all the space - assert_eq(active_ranges, other.active_ranges); - - // compare active ranges - bool res = true; - active_ranges.forEach([&](std::size_t pos){ - res = res && (data[pos] == other.data[pos]); - }); - return res; - } - - /** - * Allocates the given range within this large array. - * After this call, the corresponding sub-range can be accessed. - */ - void allocate(std::size_t start, std::size_t end) { - // check for emptiness - if (start >= end) return; - assert_le(end, size) << "Invalid range " << start << " - " << end << " for array of size " << size; - - - // invoke the constructor for the released objects (if required) - if (!std::is_trivially_constructible::value) { - - // compute the ranges of new elements - auto newElements = detail::Intervals::fromRange(start,end); - newElements.remove(active_ranges); - - - // initialize the newly allocated elements - newElements.forEach([this](std::size_t i){ - new (&data[i]) T(); - }); - } - - // add to active range - active_ranges.add(start,end); - } - - /** - * Frees the given range, thereby deleting the content and freeing the - * associated memory pages. - */ - void free(std::size_t start, std::size_t end) { - - // check for emptiness - if (start >= end) return; - assert_le(end, size) << "Invalid range " << start << " - " << end << " for array of size " << size; - - // invoke the destructor for the released objects (if required) - if (!std::is_trivially_destructible::value) { - - // compute the elements to be removed - auto removedElements = detail::Intervals::fromRange(start,end); - removedElements.retain(active_ranges); - - // delete elements to be removed - removedElements.forEach([this](std::size_t i){ - data[i].~T(); // explicit destructor call - }); - - } - - // remove range from active ranges - active_ranges.remove(start,end); - - #ifdef _MSC_VER - // do nothing - #else - // get address of lower boundary - uintptr_t ptr_start = (uintptr_t)(data + start); - uintptr_t ptr_end = (uintptr_t)(data + end); - - auto page_size = getPageSize(); - uintptr_t pg_start = ptr_start - (ptr_start % page_size); - uintptr_t pg_end = ptr_end - (ptr_end % page_size) + page_size; - - std::size_t idx_start = (pg_start - (uintptr_t)(data)) / sizeof(T); - std::size_t idx_end = (pg_end - (uintptr_t)(data)) / sizeof(T); - - assert_le(idx_start,start); - assert_le(end,idx_end); - - if (active_ranges.coversAny(idx_start,start)) pg_start += page_size; - if (active_ranges.coversAny(end,idx_end)) pg_end -= page_size; - pg_end = std::min(pg_end,ptr_end); - - if (pg_start >= pg_end) return; - - - void* section_start = (void*)pg_start; - std::size_t length = pg_end - pg_start; - munmap(section_start, length); - auto res = mmap(section_start, length, - PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE | MAP_FIXED, - -1,0 - ); - if ((void*)-1 == (void*)res) { - assert_ne((void*)-1,(void*)res); - } - #endif - } - - /** - * Provides mutable access to the element at the given position. - */ - T& operator[](std::size_t pos) { - return data[pos]; - } - - /** - * Provides read-only access to the element at the given position. - */ - const T& operator[](std::size_t pos) const { - return data[pos]; - } - - private: - - /** - * Determines the memory page size of the system. - */ - static long getPageSize() { - #ifndef _MSC_VER - static const long PAGE_SIZE = sysconf(_SC_PAGESIZE); - #else - static const long PAGE_SIZE = 0; - #endif - return PAGE_SIZE; - } - - }; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/printer/arrays.h b/vendor/allscale/utils/printer/arrays.h deleted file mode 100644 index 9a1488d5d..000000000 --- a/vendor/allscale/utils/printer/arrays.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/printer/join.h" - -namespace std { - - template - ostream& operator<<(ostream& out, const array& data) { - return out << "[" << allscale::utils::join(",", data) << "]"; - } - -} diff --git a/vendor/allscale/utils/printer/join.h b/vendor/allscale/utils/printer/join.h deleted file mode 100644 index 15373b351..000000000 --- a/vendor/allscale/utils/printer/join.h +++ /dev/null @@ -1,79 +0,0 @@ -#pragma once - -namespace allscale { -namespace utils { - - namespace detail { - - template - struct DefaultElementPrinter { - void operator()(std::ostream& out, const T& value) const { - out << value; - } - }; - - template - class joinable { - - Iter begin; - Iter end; - Sep sep; - ElementPrinter printer; - - public: - - joinable(const Iter& begin, const Iter& end, const Sep& sep, const ElementPrinter& printer = ElementPrinter()) - : begin(begin), end(end), sep(sep), printer(printer) {} - - friend - std::ostream& operator<<(std::ostream& out, const joinable& j) { - if (j.begin == j.end) return out; - Iter cur = j.begin; - j.printer(out, *cur); - cur++; - while(cur != j.end) { - out << j.sep; - j.printer(out, *cur); - cur++; - } - return out; - } - - }; - - } - - - template::value_type> - detail::joinable> join(const char* sep, const Iter& begin, const Iter& end) { - return detail::joinable>(begin,end,sep); - } - - template::value_type> - detail::joinable> join(const std::string& sep, const Iter& begin, const Iter& end) { - return detail::joinable>(begin,end,sep); - } - - template - auto join(const Sep& sep, const Container& c) -> decltype(join(sep, c.cbegin(), c.cend())) { - return join(sep, c.cbegin(), c.cend()); - } - - template - detail::joinable join(const char* sep, const Iter& begin, const Iter& end, const Printer& printer) { - return detail::joinable(begin,end,sep,printer); - } - - template - detail::joinable join(const std::string& sep, const Iter& begin, const Iter& end, const Printer& printer) { - return detail::joinable(begin,end,sep,printer); - } - - template - auto join(const Sep& sep, const Container& c, const Printer& p) -> decltype(join(sep, c.cbegin(), c.cend(),p)) { - return join(sep, c.cbegin(), c.cend(),p); - } - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/printer/pairs.h b/vendor/allscale/utils/printer/pairs.h deleted file mode 100644 index 85ca619ea..000000000 --- a/vendor/allscale/utils/printer/pairs.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include -#include - -namespace std { - - template - ostream& operator<<(ostream& out, const pair& data) { - return out << "[" << data.first << "," << data.second << "]"; - } - -} diff --git a/vendor/allscale/utils/printer/set.h b/vendor/allscale/utils/printer/set.h deleted file mode 100644 index 404597103..000000000 --- a/vendor/allscale/utils/printer/set.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/printer/join.h" - -namespace std { - - template - ostream& operator<<(ostream& out, const set& data) { - return out << "{" << allscale::utils::join(",", data) << "}"; - } - -} diff --git a/vendor/allscale/utils/printer/vectors.h b/vendor/allscale/utils/printer/vectors.h deleted file mode 100644 index e197f986a..000000000 --- a/vendor/allscale/utils/printer/vectors.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/printer/join.h" - -namespace std { - - template - ostream& operator<<(ostream& out, const vector& data) { - return out << "[" << allscale::utils::join(",", data) << "]"; - } - -} diff --git a/vendor/allscale/utils/range.h b/vendor/allscale/utils/range.h deleted file mode 100644 index e0b7ed549..000000000 --- a/vendor/allscale/utils/range.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace allscale { -namespace utils { - - namespace detail { - - template - struct get_size { - std::size_t operator()(const Iter& a, const Iter& b) { - return std::distance(a,b); - } - }; - - template - struct get_size { - std::size_t operator()(const T* a, const T* b) { - return b - a; - } - }; - } - - - template - struct range { - Iter _begin; - Iter _end; - - Iter begin() const { - return _begin; - } - - Iter end() const { - return _end; - } - - bool empty() const { - return _begin == _end; - } - - std::size_t size() const { - return detail::get_size()(_begin,_end); - } - - const typename std::iterator_traits::value_type& front() const { - return *_begin; - } - - const typename std::iterator_traits::value_type& back() const { - return *(_end - 1); - } - }; - - template - bool operator==(const std::vector& data, const range& range) { - if (data.size() != range.size()) return false; - return std::equal(data.begin(), data.end(), range.begin()); - } - - template - bool operator==(const range& range, const std::vector& data) { - return data == range; - } - - template - bool operator!=(const std::vector& data, const range& range) { - return !(data == range); - } - - template - bool operator!=(const range& range, const std::vector& data) { - return data != range; - } - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/raw_buffer.h b/vendor/allscale/utils/raw_buffer.h deleted file mode 100644 index a38e54614..000000000 --- a/vendor/allscale/utils/raw_buffer.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once - -namespace allscale { -namespace utils { - - /** - * A utility for interpreting raw buffers. - */ - class RawBuffer { - - char* cur; - - public: - - /** - * Creates a buffer based on the given memory location. - */ - template - RawBuffer(T* base) : cur(reinterpret_cast(base)) {} - - /** - * Consumes an element of type T from the underlying buffer. - */ - template - T& consume() { - return consumeArray(1)[0]; - } - - /** - * Consumes an array of elements of type T form the underlying buffer. - */ - template - T* consumeArray(std::size_t numElements) { - - // check that the given type allows this kind of operations - static_assert( - std::is_trivially_copy_assignable::value || - std::is_trivially_move_assignable::value, - "Invalid reinterpretation of raw data!" - ); - - // 'parse' initial elements - auto res = reinterpret_cast(cur); - // progress position - cur += sizeof(T) * numElements; - // return result - return res; - } - - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/serializer.h b/vendor/allscale/utils/serializer.h deleted file mode 100644 index 92791b8bf..000000000 --- a/vendor/allscale/utils/serializer.h +++ /dev/null @@ -1,500 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "allscale/utils/assert.h" - -#if defined(ALLSCALE_WITH_HPX) -#include -#include -#include -#include -#endif - -namespace allscale { -namespace utils { - - // --------------------------------------------------------------------------------- - // Declarations - // --------------------------------------------------------------------------------- - - /** - * An archive contains the serialized version of some data structure (fragment). - * It enables the exchange of data between e.g. address spaces. - */ - class Archive; - - /** - * An archive writer is a builder for archives. It is utilized for serializing objects. - */ - class ArchiveWriter; - - /** - * An archive reader is a utility to reconstruct data structures from archives. - */ - class ArchiveReader; - - /** - * A serializer describes the way types are converted to and restored from archives. - */ - template - struct serializer; - - /** - * This type trait can be utilized to test whether a given type is serializable, - * thus packable into an archive, or not. - */ - template - struct is_serializable; - - /** - * A facade function for packing an object into an archive. - */ - template - typename std::enable_if::value,Archive>::type - serialize(const T&); - - /** - * A facade function for unpacking an object from an archive. - */ - template - typename std::enable_if::value,T>::type - deserialize(Archive&); - - - // --------------------------------------------------------------------------------- - // Definitions - // --------------------------------------------------------------------------------- - - - namespace detail { - - /** - * A simple, initial, functionally complete implementation of a data buffer - * for storing data within an archive. - */ - class DataBuffer { - - // check some underlying assumption - static_assert(sizeof(char)==1, "If a char is more than a byte, this implementation needs to be checked."); - - // the actual data store (std::vector handles the dynamic growing for us) - std::vector data; - - public: - - DataBuffer() {} - - DataBuffer(const DataBuffer&) = default; - DataBuffer(DataBuffer&&) = default; - - DataBuffer(const std::vector& data) : data(data) {} - DataBuffer(std::vector&& data) : data(std::move(data)) {} - - DataBuffer& operator=(const DataBuffer&) = default; - DataBuffer& operator=(DataBuffer&&) = default; - - /** - * The main function for appending data to this buffer. - */ - void append(const char* start, std::size_t count) { - // create space - auto pos = data.size(); - data.resize(pos + count / sizeof(char)); - - // append at end - std::memcpy(&data[pos],start,count); - - } - - /** - * Obtains the number of bytes this buffer is occupying. - */ - std::size_t size() const { - return data.size() * sizeof(char); - } - - /** - * Obtains a pointer to the begin of the internally maintained buffer (inclusive). - */ - const char* begin() const { - return &data.front(); - } - - /** - * Obtains a pointer to the end of the internally maintained buffer (exclusive). - */ - const char* end() const { - return &data.back() + 1; - } - - /** - * Support implicit conversion of this buffer to a vector of characters. - */ - operator const std::vector&() const { - return data; - } - - /** - * Also enable the implicit hand-off of the ownership of the underlying char store. - */ - operator std::vector() && { - return std::move(data); - } - - - }; - - } // end namespace detail - - - class Archive { - - friend class ArchiveWriter; - friend class ArchiveReader; - - // the data represented by this archive - detail::DataBuffer data; - - Archive(detail::DataBuffer&& data) - : data(std::move(data)) {} - - public: - - - - Archive(const Archive&) = default; - Archive(Archive&&) = default; - - Archive(const std::vector& buffer) : data(buffer) {} - Archive(std::vector&& buffer) : data(std::move(buffer)) {} - - Archive& operator=(const Archive&) = default; - Archive& operator=(Archive&&) = default; - - /** - * Support implicit conversion of this archive to a vector of characters. - */ - operator const std::vector&() const { - return data; - } - - /** - * Also enable the implicit hand-off of the ownership of the underlying buffer. - */ - operator std::vector() && { - return std::move(data); - } - - /** - * Provide explicit access to the underlying char buffer. - */ - const std::vector& getBuffer() const { - return data; - } - }; - -#if !defined(ALLSCALE_WITH_HPX) - class ArchiveWriter { - - // the buffer targeted by this archive writer - detail::DataBuffer data; - - public: - - ArchiveWriter() {} - - ArchiveWriter(const ArchiveWriter&) = delete; - ArchiveWriter(ArchiveWriter&&) = default; - - ArchiveWriter& operator=(const ArchiveWriter&) = delete; - ArchiveWriter& operator=(ArchiveWriter&&) = default; - - /** - * Appends a given number of bytes to the end of the underlying data buffer. - */ - void write(const char* src, std::size_t count) { - data.append(src,count); - } - - /** - * A utility function wrapping the invocation of the serialization mechanism. - */ - template - std::enable_if_t::value,void> - write(const T& value) { - // use serializer to store object of this type - serializer::store(*this,value); - } - - /** - * Obtains the archive produces by this writer. After the call, - * this writer must not be used any more. - */ - Archive toArchive() && { - return std::move(data); - } - - }; -#else - class ArchiveWriter { - hpx::serialization::output_archive &ar_; - - public: - ArchiveWriter(hpx::serialization::output_archive &ar) : ar_(ar) {} - - /** - * Appends a given number of bytes to the end of the underlying data buffer. - */ - void write(const char* src, std::size_t count) { - ar_ & hpx::serialization::make_array(src, count); - } - - /** - * A utility function wrapping the invocation of the serialization mechanism. - */ - template - std::enable_if_t::value,void> - write(const T& value) { -// // use serializer to store object of this type - serializer::store(*this,value); - } - - template - std::enable_if_t::value,void> - write(const T& value) { - ar_ & value; - } - }; -#endif - -#if !defined(ALLSCALE_WITH_HPX) - class ArchiveReader { - - // the current point of the reader - const char* cur; - - // the end of the reader (only checked for debugging) - const char* end; - - public: - - /** - * A archive reader can only be obtained from an existing archive. - */ - ArchiveReader(const Archive& archive) - : cur(archive.data.begin()), end(archive.data.end()) {} - - ArchiveReader(const ArchiveReader&) = delete; - ArchiveReader(ArchiveReader&&) = default; - - ArchiveReader& operator=(const ArchiveReader&) = delete; - ArchiveReader& operator=(ArchiveReader&&) = default; - - /** - * Reads a number of bytes from the underlying buffer. - */ - void read(char* dst, std::size_t count) { - // copy the data - std::memcpy(dst,cur,count); - // move pointer forward - cur += count; - - // make sure that we did not cross the end of the buffer - assert_le(cur,end); - } - - /** - * A utility function wrapping up the de-serialization of an object - * of type T from the underlying buffer. - */ - template - std::enable_if_t::value,T> - read() { - // use serializer to restore object of this type - return serializer::load(*this); - } - - }; -#else - class ArchiveReader { - hpx::serialization::input_archive &ar_; - - public: - ArchiveReader(hpx::serialization::input_archive &ar) : ar_(ar) {} - - /** - * Reads a number of bytes from the underlying buffer. - */ - void read(char* dst, std::size_t count) { - ar_ & hpx::serialization::make_array(dst, count); - } - - /** - * A utility function wrapping up the de-serialization of an object - * of type T from the underlying buffer. - */ - template - std::enable_if_t::value,T> - read() { - // use serializer to restore object of this type - return serializer::load(*this); - } - - template - std::enable_if_t::value,T> - read() { - // use serializer to restore object of this type - T t; - ar_ & t; - return t; - } - }; -#endif - - - /** - * Adds support for the serialization to every type T supporting - * - * - a static member function T load(ArchiveReader&) - * - a member function void store(ArchiveWriter&) - * - * Thus, serialization / deserialization can be integrated through member functions. - */ - template - struct serializer())),T>::value && - // ... and a store member function - std::is_same().store(std::declval())),void>::value, - void>::type> { - - static T load(ArchiveReader& a) { - return T::load(a); - } - static void store(ArchiveWriter& a, const T& value) { - value.store(a); - } - }; - - - /** - * Enables the skipping of const qualifiers for types. - * Also const values can be serialized and deserialized if requested. - */ - template - struct serializer::value, - void>::type> : public serializer {}; - - - - // -- primitive type serialization -- - - namespace detail { - - /** - * A helper functor for serializing primitive types. - */ - template - struct primitive_serializer { - static T load(ArchiveReader& reader) { - T res = 0; - reader.read(reinterpret_cast(&res),sizeof(T)); - return res; - } - static void store(ArchiveWriter& writer, const T& value) { - writer.write(reinterpret_cast(&value),sizeof(T)); - } - }; - - } // end namespace detail - - template<> struct serializer : public detail::primitive_serializer {}; - - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - template<> struct serializer : public detail::primitive_serializer {}; - - - template - struct is_serializable : public std::false_type {}; - - template - struct is_serializable::load)), T(*)(Archive&)>::value && - std::is_same::store)), void(*)(Archive&, const T&)>::value, - void>::type> : public std::true_type {}; - - - - // -- facade functions -- -#if !defined(ALLSCALE_WITH_HPX) - template - typename std::enable_if::value,Archive>::type - serialize(const T& value) { - ArchiveWriter writer; - writer.write(value); - return std::move(writer).toArchive(); - } - - template - typename std::enable_if::value,T>::type - deserialize(Archive& a) { - return ArchiveReader(a).read(); - } -#endif - -} // end namespace utils -} // end namespace allscale - -#if defined(ALLSCALE_WITH_HPX) -namespace hpx { -namespace serialization { - template - typename std::enable_if< - ::allscale::utils::is_serializable::value && - !(std::is_integral::value || std::is_floating_point::value), - output_archive& - >::type - serialize(output_archive & ar, T const & t, int) { - allscale::utils::ArchiveWriter writer(ar); - writer.write(t); - return ar; - } - - template - typename std::enable_if< - ::allscale::utils::is_serializable::value && - !(std::is_integral::value || std::is_floating_point::value), - input_archive& - >::type - serialize(input_archive & ar, T & t, int) { - - allscale::utils::ArchiveReader reader(ar); - t = reader.read(); - return ar; - } -} // end namespace serialization -} // end namespace allscale -#endif diff --git a/vendor/allscale/utils/serializer/arrays.h b/vendor/allscale/utils/serializer/arrays.h deleted file mode 100644 index 079405334..000000000 --- a/vendor/allscale/utils/serializer/arrays.h +++ /dev/null @@ -1,60 +0,0 @@ -#pragma once - -#ifdef ALLSCALE_WITH_HPX - #include -#endif - -#include "allscale/utils/serializer.h" - -#include - -namespace allscale { -namespace utils { - - - namespace detail { - - template - struct array_load_helper { - - template - std::array operator()(ArchiveReader& reader, Args&& ... args) { - return array_load_helper()(reader,args...,reader.read()); - } - }; - - template - struct array_load_helper { - - template - std::array operator()(ArchiveReader&, Args&& ... args) { - return std::array{ - { args... } - }; - } - - }; - - } - - - /** - * Add support for serializing / de-serializing arrays. - */ - template - struct serializer,typename std::enable_if::value,void>::type> { - - static std::array load(ArchiveReader& reader) { - // support loading of array for elements without default constructor - return detail::array_load_helper()(reader); - } - static void store(ArchiveWriter& writer, const std::array& value) { - for(const auto& cur : value) { - writer.write(cur); - } - } - }; - -} // end namespace utils -} // end namespace allscale - diff --git a/vendor/allscale/utils/serializer/strings.h b/vendor/allscale/utils/serializer/strings.h deleted file mode 100644 index 619b59efd..000000000 --- a/vendor/allscale/utils/serializer/strings.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#ifdef ALLSCALE_WITH_HPX -#include -#endif - -#include "allscale/utils/serializer.h" - -#include - -namespace allscale { -namespace utils { - - /** - * Add support for serializing / de-serializing strings. - */ - template<> - struct serializer { - - static std::string load(ArchiveReader& reader) { - auto size = reader.read(); - std::string res; - res.resize(size); - reader.read(&res[0],size); - return res; - } - static void store(ArchiveWriter& writer, const std::string& value) { - writer.write(value.size()); - writer.write(&value[0],value.size()); - } - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/serializer/vectors.h b/vendor/allscale/utils/serializer/vectors.h deleted file mode 100644 index 81870ed35..000000000 --- a/vendor/allscale/utils/serializer/vectors.h +++ /dev/null @@ -1,52 +0,0 @@ -#pragma once - -#ifdef ALLSCALE_WITH_HPX - #include "allscale/utils/serializer.h" -#endif - -#include - -#include "allscale/utils/serializer/arrays.h" - -namespace allscale { -namespace utils { - - /** - * Add support for serializing / de-serializing std::vectors. - */ - template - struct serializer,typename std::enable_if::value,void>::type> { - - static std::vector load(ArchiveReader& reader) { - - // create the result - std::vector res; - - // load the size - auto size = reader.read(); - - // make some space - res.reserve(size); - - // load the elements - for(std::size_t i=0; i()); - } - - // done - return res; - } - static void store(ArchiveWriter& writer, const std::vector& value) { - - // start with the size - writer.write(value.size()); - - // followed by all the elements - for(const auto& cur : value) { - writer.write(cur); - } - } - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/static_grid.h b/vendor/allscale/utils/static_grid.h deleted file mode 100644 index 582282e28..000000000 --- a/vendor/allscale/utils/static_grid.h +++ /dev/null @@ -1,247 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/functional_utils.h" -#include "allscale/utils/serializer.h" -#include "allscale/utils/vector.h" - -namespace allscale { -namespace utils { - - template - struct StaticGrid; - - template - struct StaticGrid { - using data_type = std::array,a>; - using addr_type = utils::Vector; - - private: - - data_type data; - - template - typename std::enable_if::value,void>::type - assignInternal(const StaticGrid& other) { - std::memcpy(&data,&other.data,sizeof(data_type)); - } - - template - typename std::enable_if::value,void>::type - assignInternal(const StaticGrid& other) { - data = other.data; - } - - public: - - StaticGrid& operator=(const StaticGrid& other) { - if (this == &other) return *this; - assignInternal(other); - return *this; - } - - Cell& operator[](const addr_type& addr) { - return this->template operator[](addr); - } - - const Cell& operator[](const addr_type& addr) const { - return this->template operator[](addr); - } - - template - Cell& operator[](const utils::Vector& addr) { - allscale_check_bounds((size_t)addr[D - sizeof...(rest)-1], data); - return data[addr[D-sizeof...(rest)-1]][addr]; - } - - template - const Cell& operator[](const utils::Vector& addr) const { - allscale_check_bounds((size_t)addr[D - sizeof...(rest)-1], data); - return data[addr[D-sizeof...(rest)-1]][addr]; - } - - utils::Vector size() const { - return { a, rest... }; - } - - template - std::enable_if_t::arity == 1, void> - forEach(const Lambda& lambda) const { - for(const auto& cur : data) { - cur.forEach(lambda); - } - } - - template - std::enable_if_t::arity == 1, void> - forEach(const Lambda& lambda) { - for(auto& cur : data) { - cur.forEach(lambda); - } - } - - template - std::enable_if_t::arity == 2, void> - forEach(const Lambda& lambda) const { - addr_type pos; - _forEachInternal(pos,lambda); - } - - template - std::enable_if_t::arity == 2, void> - forEach(const Lambda& lambda) { - addr_type pos; - _forEachInternal(pos,lambda); - } - - void store(utils::ArchiveWriter& writer) const { - for(const auto& e : data) { - writer.write(e); - } - } - - static StaticGrid load(utils::ArchiveReader& reader) { - StaticGrid grid; - for(auto& e : grid.data) { - e = reader.read(); - } - return grid; - } - - private: - - template - friend struct StaticGrid; - - template - std::enable_if_t::arity == 2, void> - _forEachInternal(utils::Vector& pos, const Lambda& lambda) const { - auto& i = pos[D-sizeof...(rest)-1]; - i = 0; - for(const auto& cur : data) { - cur._forEachInternal(pos,lambda); - i++; - } - } - - template - std::enable_if_t::arity == 2, void> - _forEachInternal(utils::Vector& pos, const Lambda& lambda) { - auto& i = pos[D-sizeof...(rest)-1]; - i = 0; - for(auto& cur : data) { - cur._forEachInternal(pos,lambda); - i++; - } - } - - }; - - template - struct StaticGrid { - using data_type = Cell; - using addr_type = utils::Vector; - - private: - - data_type data; - - template - typename std::enable_if::value,void>::type - assignInternal(const StaticGrid& other) { - std::memcpy(&data,&other.data,sizeof(data_type)); - } - - template - typename std::enable_if::value,void>::type - assignInternal(const StaticGrid& other) { - data = other.data; - } - - public: - - StaticGrid& operator=(const StaticGrid& other) { - if (this == &other) return *this; - assignInternal(other); - return *this; - } - - Cell& operator[](const addr_type& addr) { - return this->template operator[]<0>(addr); - } - - const Cell& operator[](const addr_type& addr) const { - return this->template operator[]<0>(addr); - } - - template - Cell& operator[](const utils::Vector&) { - return data; - } - - template - const Cell& operator[](const utils::Vector&) const { - return data; - } - - std::size_t size() const { - return 1; - } - - template - std::enable_if_t::arity == 1, void> - forEach(const Lambda& lambda) const { - lambda(data); - } - - template - std::enable_if_t::arity == 1, void> - forEach(const Lambda& lambda) { - lambda(data); - } - - template - std::enable_if_t::arity == 2, void> - forEach(const Lambda& lambda) const { - lambda(addr_type(),data); - } - - template - std::enable_if_t::arity == 2, void> - forEach(const Lambda& lambda) { - lambda(addr_type(),data); - } - - void store(utils::ArchiveWriter& writer) const { - writer.write(data); - } - - static StaticGrid load(utils::ArchiveReader& reader) { - StaticGrid grid; - grid.data = std::move(reader.read()); - return grid; - } - - private: - - template - friend struct StaticGrid; - - template - std::enable_if_t::arity == 2, void> - _forEachInternal(utils::Vector& pos, const Lambda& lambda) const { - lambda(const_cast&>(pos),data); - } - - template - std::enable_if_t::arity == 2, void> - _forEachInternal(utils::Vector& pos, const Lambda& lambda) { - lambda(const_cast&>(pos),data); - } - - }; - -} // end utils -} // end namespace allscale diff --git a/vendor/allscale/utils/static_map.h b/vendor/allscale/utils/static_map.h deleted file mode 100644 index 44e5c6083..000000000 --- a/vendor/allscale/utils/static_map.h +++ /dev/null @@ -1,120 +0,0 @@ -#pragma once - -#include -#include - -#include "allscale/utils/type_list.h" - -namespace allscale { -namespace utils { - - // -------------------------------------------------------------------- - // Declarations - // -------------------------------------------------------------------- - - - /** - * A static map mapping a given value to each of a given list of types. - */ - template - class StaticMap; - - /** - * An auxiliary type for forming lists of keys. - */ - template - struct keys {}; - - - // -------------------------------------------------------------------- - // Definitions - // -------------------------------------------------------------------- - - namespace key_utils { - - template - struct is_keys : public std::false_type {}; - - template - struct is_keys> : public std::true_type {}; - - template - struct invalid_key : public std::false_type {}; - } - - template - class StaticMap { - - static_assert(key_utils::is_keys::value, "First template parameters must be of form keys<...>"); - - }; - - - template - class StaticMap,Value> { - - using key_list = type_list; - - std::array values; - - public: - - // -- accessors and mutators -- - - StaticMap(const Value& value) { - for(auto& cur : values) cur = value; - } - - StaticMap() = default; - StaticMap(const StaticMap&) = default; - StaticMap(StaticMap&&) = default; - - StaticMap& operator=(const StaticMap&) = default; - StaticMap& operator=(StaticMap&&) = default; - - // -- accessors and mutators -- - - template - Value& get() { - return values[type_index::value]; - } - - template - const Value& get() const { - return values[type_index::value]; - } - - auto begin() { - return values.begin(); - } - - auto begin() const { - return values.begin(); - } - - auto end() { - return values.end(); - } - - auto end() const { - return values.end(); - } - - template - void forEach(const Body& body) { - for(auto& cur : values) { - body(cur); - } - } - - template - void forEach(const Body& body) const { - for(const auto& cur : values) { - body(cur); - } - } - - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/string_utils.h b/vendor/allscale/utils/string_utils.h deleted file mode 100644 index 439931fa5..000000000 --- a/vendor/allscale/utils/string_utils.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include -#include -#include - -template -std::string toString(const T& value) { - std::stringstream res; - res << value; - return res.str(); -} diff --git a/vendor/allscale/utils/table.h b/vendor/allscale/utils/table.h deleted file mode 100644 index 72dfc850a..000000000 --- a/vendor/allscale/utils/table.h +++ /dev/null @@ -1,243 +0,0 @@ -#pragma once - -#include - -#include "allscale/utils/assert.h" -#include "allscale/utils/io_utils.h" -#include "allscale/utils/raw_buffer.h" -#include "allscale/utils/printer/join.h" - -namespace allscale { -namespace utils { - - /** - * A container for a const-sized array of elements, which may or may - * not be owned by instances of this type. - */ - template - class Table { - - std::size_t length; - - T* data; - - bool owned; - - public: - - using const_iterator = const T*; - using iterator = T*; - - Table() - : length(0), data(nullptr), owned(false) {} - - Table(std::size_t size) - : length(size), data(allocate(length)), owned(true) { - - // see whether there is something to do - if (std::is_trivially_default_constructible::value) return; - - // use in-place default constructor - for(auto& cur : *this) { - new (&cur) T(); - } - } - - Table(std::size_t size, const T& value) - : length(size), data(allocate(length)), owned(true) { - // use in-place copy constructor - for(auto& cur : *this) { - new (&cur) T(value); - } - } - - Table(T* data, std::size_t size) - : length(size), data(data), owned(false) {} - - Table(T* begin, T* end) - : Table(begin,std::distance(begin,end)) {} - - - Table(const Table& other) - : length(other.length), - data(allocate(length)), - owned(true) { - - // see whether there is something to do - if (length > 0 && std::is_trivially_copy_constructible::value) { - std::memcpy(data,other.data,sizeof(T)*length); - return; - } - - // use in-place constructor to copy data - for(std::size_t i=0; i::value) { - for(auto& cur : *this) { - cur.~T(); - } - } - - // free the owned memory - free(data); - } - - - Table& operator=(const Table& other) { - - // shortcut for stupid stuff - if (this == &other) return *this; - - // free old state - this->~Table(); - - // create a copy of the new state - new (this) Table(other); - - // done - return *this; - } - - Table& operator=(Table&& other) { - - // shortcut for stupid stuff - assert_ne(this,&other) << "Should not be possible!"; - - // free old state - this->~Table(); - - // create a copy of the new state - new (this) Table(std::move(other)); - - // done - return *this; - } - - bool empty() const { - return length == 0; - } - - std::size_t size() const { - return length; - } - - T& operator[](std::size_t i) { - return data[i]; - } - - const T& operator[](std::size_t i) const { - return data[i]; - } - - const_iterator begin() const { - return data; - } - - const_iterator cbegin() const { - return data; - } - - iterator begin() { - return data; - } - - const_iterator end() const { - return data + length; - } - - const_iterator cend() const { - return data + length; - } - - iterator end() { - return data + length; - } - - bool isOwner() const { - return owned; - } - - friend std::ostream& operator<<(std::ostream& out, const Table& table) { - return out << "[" << join(",",table) << "]"; - } - - void store(std::ostream& out) const { - // write length and data - write(out,length); - write(out,data,data+length); - - // write padding bytes - forEachPaddingByte([&]{ - write(out,(char)0); - }); - - } - - static Table load(std::istream& in) { - - Table res; - - res.owned = true; - res.length = read(in); - res.data = allocate(res.length); - read(in,res.begin(),res.end()); - - // consume padding bytes - res.forEachPaddingByte([&]{ - read(in); - }); - - return res; - } - - static Table interpret(utils::RawBuffer& buffer) { - - Table res; - res.owned = false; - res.length = buffer.consume(); - res.data = buffer.consumeArray(res.length); - - // consume padding bytes - res.forEachPaddingByte([&]{ - buffer.consume(); - }); - - return res; - - } - - private: - - static T* allocate(std::size_t size) { - if (size == 0) return nullptr; - return reinterpret_cast(malloc(sizeof(T)*size)); - } - - template - void forEachPaddingByte(const Body& body) const { - auto c = (sizeof(T)*length) % 8; - while(c%8 != 0) { - body(); - c++; - } - } - - }; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/tuple_utils.h b/vendor/allscale/utils/tuple_utils.h deleted file mode 100644 index 5bf8ede2c..000000000 --- a/vendor/allscale/utils/tuple_utils.h +++ /dev/null @@ -1,111 +0,0 @@ -#include -#include - -namespace allscale { -namespace utils { - - namespace detail { - - template - struct tuple_for_each_helper { - template - void operator()(const Op& op, std::tuple& tuple) { - tuple_for_each_helper()(op,tuple); - op(std::get(tuple)); - } - template - void operator()(const Op& op, const std::tuple& tuple) { - tuple_for_each_helper()(op,tuple); - op(std::get(tuple)); - } - }; - - template<> - struct tuple_for_each_helper<0> { - template - void operator()(const Op&, const std::tuple&) { - // nothing - } - }; - - } - - /** - * A utility to apply an operator on all elements of a tuple in order. - * - * @param tuple the (mutable) tuple - * @param op the operator to be applied - */ - template - void forEach(std::tuple& tuple, const Op& op) { - detail::tuple_for_each_helper()(op,tuple); - } - - /** - * A utility to apply an operator on all elements of a tuple in order. - * - * @param tuple the (constant) tuple - * @param op the operator to be applied - */ - template - void forEach(const std::tuple& tuple, const Op& op) { - detail::tuple_for_each_helper()(op,tuple); - } - - namespace detail { - - template - auto map_helper(const std::tuple& in, const Op& op, std::integer_sequence) { - return std::make_tuple(op(std::get(in))...); - } - - template - auto map_helper(std::tuple& in, const Op& op, std::integer_sequence) { - return std::make_tuple(op(std::get(in))...); - } - - } - - /** - * A utility to apply a transformation on each element of a given tuple and return a a tuple containing - * the results. - * - * @param tuple the (constant) input tuple - * @param op the operation to be applied on each element of the tuple - */ - template - auto map(const std::tuple& tuple, const Op& op) { - return detail::map_helper(tuple,op,std::make_integer_sequence()); - } - - /** - * A utility to apply a transformation on each element of a given tuple and return a a tuple containing - * the results. - * - * @param tuple the (mutable) input tuple - * @param op the operation to be applied on each element of the tuple - */ - template - auto map(std::tuple& tuple, const Op& op) { - return detail::map_helper(tuple,op,std::make_integer_sequence()); - } - -} // end namespace utils -} // end namespace allscale - -namespace std { - - template - std::ostream& operator<<(std::ostream& out, const std::tuple& tuple) { - out << "("; - std::size_t count = 0; - const std::size_t numElements = sizeof...(Elements); - allscale::utils::forEach(tuple,[&](const auto& cur) { - out << cur; - count++; - if (count != numElements) out << ","; - }); - return out << ")"; - } - -} diff --git a/vendor/allscale/utils/type_list.h b/vendor/allscale/utils/type_list.h deleted file mode 100644 index 91caa3857..000000000 --- a/vendor/allscale/utils/type_list.h +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include - -namespace allscale { -namespace utils { - - - // -------------------- Type List traits ---------------------------- - - template - struct type_list { - enum { length = sizeof...(Ts) }; - enum { empty = (length == 0) }; - }; - - - // -- test whether a given list contains a given type -- - - template - struct type_list_contains; - - template - struct type_list_contains> : public std::true_type {}; - - template - struct type_list_contains> : public type_list_contains> {}; - - template - struct type_list_contains> : public std::false_type {}; - - - // -- extracts a type at a given position -- - - template - struct type_at; - - template - struct type_at<0, type_list> { - typedef H type; - }; - - template - struct type_at> { - typedef typename type_at>::type type; - }; - - - // -- obtains the index of a given type -- - - template - struct type_index; - - template - struct type_index> { - enum { value = 0 }; - }; - - template - struct type_index> { - enum { value = type_index>::value + 1 }; - }; - - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/unused.h b/vendor/allscale/utils/unused.h deleted file mode 100644 index 7e696122f..000000000 --- a/vendor/allscale/utils/unused.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -/** - * This header defines a macro to mark knowingly unused variables as being - * unused, so that the compiler is not issuing warnings about those. - */ - -#ifdef __GNUC__ - #define __allscale_unused __attribute__((unused)) -#else - #define __allscale_unused -#endif diff --git a/vendor/allscale/utils/vector.h b/vendor/allscale/utils/vector.h deleted file mode 100644 index 611d9ea4b..000000000 --- a/vendor/allscale/utils/vector.h +++ /dev/null @@ -1,415 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "allscale/utils/printer/arrays.h" -#include "allscale/utils/assert.h" -#include "allscale/utils/unused.h" -#include "allscale/utils/serializer/arrays.h" - -namespace allscale { -namespace utils { - - // generic vector implementation - template - class Vector { - - std::array data; - - public: - - using element_type = T; - - Vector() = default; - - Vector(const T& e) { - data.fill(e); - } - - Vector(const Vector&) = default; - Vector(Vector&&) = default; - - template - Vector(const Vector& other) - : data(other.data) {} - - template - Vector(const std::array& other) - : data(other) {} - - Vector(const std::initializer_list& values) { - assert_eq(Dims,values.size()); - init(values); - } - - template - Vector(T a, T b, Rest ... rest) : data{ {a,b,rest...} } { - static_assert(Dims == sizeof...(rest)+2, "Invalid number of components!"); - } - - - Vector& operator=(const Vector& other) = default; - Vector& operator=(Vector&& other) = default; - - T& operator[](const std::size_t index) { - return data[index]; - } - - const T& operator[](const std::size_t index) const { - return data[index]; - } - - // relational operators - // defined in-class, since the private std::array data member has matching operators to forward to - - bool operator==(const Vector& other) const { - return data == other.data; - } - - bool operator!=(const Vector& other) const { - return !(data == other.data); - } - - bool operator<(const Vector& other) const { - return data < other.data; - } - - bool operator<=(const Vector& other) const { - return data <= other.data; - } - - bool operator>=(const Vector& other) const { - return data >= other.data; - } - - bool operator>(const Vector& other) const { - return data > other.data; - } - - // allow implicit casts to std::array - operator const std::array&() const { return data; } - - bool dominatedBy(const Vector& other) const { - for(std::size_t i=0; i& other) const { - for(std::size_t i=0; i - void init_internal(const std::initializer_list& list, const std::integer_sequence&) { - __allscale_unused auto bla = { data[Index] = *(list.begin() + Index) ... }; - } - - template - void init(const std::initializer_list& list) { - init_internal(list,std::make_index_sequence()); - } - - }; - - template - Vector& operator+=(Vector& a, const Vector& b) { - for(std::size_t i = 0; i - Vector& operator-=(Vector& a, const Vector& b) { - for(size_t i = 0; i - Vector& operator*=(Vector& a, const S& fac) { - for(size_t i =0; i - Vector& operator/=(Vector& a, const S& fac) { - for(size_t i =0; i - Vector operator+(const Vector& a, const Vector& b) { - Vector res(a); - return res += b; - } - - template - Vector operator-(const Vector& a, const Vector& b) { - Vector res(a); - return res -= b; - } - - template - Vector operator*(const Vector& vec, const S& fac) { - Vector res(vec); - return res *= fac; - } - - template - Vector operator*(const S& fac, const Vector& vec) { - return vec * fac; - } - - template - Vector operator/(const Vector& vec, const S& fac) { - Vector res(vec); - return res /= fac; - } - - template - Vector elementwise(const Vector& a, const Vector& b, const Lambda& op) { - Vector res; - for(unsigned i=0; i - Vector elementwiseMin(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return std::min(a,b); }); - } - - template - Vector elementwiseMax(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return std::max(a,b); }); - } - - template - Vector elementwiseProduct(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return a*b; }); - } - - template - Vector elementwiseDivision(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return a/b; }); - } - - template - Vector elementwiseRemainder(const Vector& a, const Vector& b) { - return elementwise(a,b,[](const T& a, const T& b) { return a % b; }); - } - - template - Vector elementwiseModulo(const Vector& a, const Vector& b) { - return elementwiseRemainder(a,b); - } - - - template - T sumOfSquares(const Vector& vec) { - T sum = T(); - for(unsigned i = 0; i < Dims; i++) { - sum += vec[i] * vec[i]; - } - return sum; - } - - // specialization for 3-dimensional vectors, providing access to named data members x, y, z - template - class Vector { - public: - - using element_type = T; - - T x, y, z; - - Vector() = default; - - Vector(const T& e) : x(e), y(e), z(e) { } - - Vector(T x, T y, T z) : x(x), y(y), z(z) { } - - Vector(const Vector&) = default; - Vector(Vector&&) = default; - - template - Vector(const Vector& other) : x(other.x), y(other.y), z(other.z) {} - - template - Vector(const std::array& other) : x(other[0]), y(other[1]), z(other[2]) {} - - T& operator[](std::size_t i) { - return (i==0) ? x : (i==1) ? y : z; - } - - const T& operator[](std::size_t i) const { - return (i==0) ? x : (i==1) ? y : z; - } - - Vector& operator=(const Vector& other) = default; - Vector& operator=(Vector&& other) = default; - - bool operator==(const Vector& other) const { - return std::tie(x,y,z) == std::tie(other.x,other.y,other.z); - } - - bool operator!=(const Vector& other) const { - return !(*this == other); - } - - bool operator<(const Vector& other) const { - return asArray() < other.asArray(); - } - - bool operator<=(const Vector& other) const { - return asArray() <= other.asArray(); - } - - bool operator>=(const Vector& other) const { - return asArray() >= other.asArray(); - } - - bool operator>(const Vector& other) const { - return asArray() > other.asArray(); - } - - operator const std::array&() const { return asArray(); } - - const std::array& asArray() const { - return reinterpret_cast&>(*this); - } - - bool dominatedBy(const Vector& other) const { - return other.x >= x && other.y >= y && other.z >= z; - } - - bool strictlyDominatedBy(const Vector& other) const { - return other.x > x && other.y > y && other.z > z; - } - - // Adds printer support to this vector. - friend std::ostream& operator<<(std::ostream& out, const Vector& vec) { - return out << "[" << vec.x << "," << vec.y << "," << vec.z << "]"; - } - - }; - - template - Vector crossProduct(const Vector& a, const Vector& b) { - return Vector { - a[1] * b[2] - a[2] * b[1], - a[2] * b[0] - a[0] * b[2], - a[0] * b[1] - a[1] * b[0] - }; - } - - // specialization for 2-dimensional vectors, providing access to named data members x, y - template - class Vector { - public: - - using element_type = T; - - T x, y; - - Vector() = default; - - Vector(const T& e) : x(e), y(e) { } - - Vector(T x, T y) : x(x), y(y) { } - - Vector(const Vector&) = default; - Vector(Vector&&) = default; - - template - Vector(const Vector& other) : x(other.x), y(other.y) {} - - template - Vector(const std::array& other) : x(other[0]), y(other[1]) {} - - T& operator[](std::size_t i) { - return (i == 0) ? x : y; - } - - const T& operator[](std::size_t i) const { - return (i == 0) ? x : y; - } - - Vector& operator=(const Vector& other) = default; - Vector& operator=(Vector&& other) = default; - - bool operator==(const Vector& other) const { - return asArray() == other.asArray(); - } - - bool operator!=(const Vector& other) const { - return !(*this == other); - } - - bool operator<(const Vector& other) const { - return asArray() < other.asArray(); - } - - bool operator<=(const Vector& other) const { - return asArray() <= other.asArray(); - } - - bool operator>=(const Vector& other) const { - return asArray() >= other.asArray(); - } - - bool operator>(const Vector& other) const { - return asArray() > other.asArray(); - } - - operator const std::array&() const { return asArray(); } - - const std::array& asArray() const { - return reinterpret_cast&>(*this); - } - - bool dominatedBy(const Vector& other) const { - return other.x >= x && other.y >= y; - } - - bool strictlyDominatedBy(const Vector& other) const { - return other.x > x && other.y > y; - } - - // Adds printer support to this vector. - friend std::ostream& operator<<(std::ostream& out, const Vector& vec) { - return out << "[" << vec.x << "," << vec.y << "]"; - } - - }; - - /** - * Add support for serializing / de-serializing Vector instances. - * The implementation is simply re-using the serializing capabilities of arrays. - */ - template - struct serializer,typename std::enable_if::value,void>::type> : public serializer> {}; - -} // end namespace utils -} // end namespace allscale diff --git a/vendor/allscale/utils/vector_utils.h b/vendor/allscale/utils/vector_utils.h deleted file mode 100644 index 7707eac06..000000000 --- a/vendor/allscale/utils/vector_utils.h +++ /dev/null @@ -1,78 +0,0 @@ -#pragma once - -#include - -namespace allscale { -namespace utils { - - namespace { - - /** - * The terminal case of a function where a variable number of arguments is written into a vector in proper order. - * - * @tparam T the element type maintained within the extended vector - * @param vector the vector to which nothing is written to - */ - template - inline void appendToVector(std::vector&) {} - - /** - * A variable-argument function writing elements into a vector in the given order. - * - * @tparam T the type of element maintained within the modified vector - * @tparam Elements the types of the remaining elements (need to be convertible to T) - * @param vector the vector to be written to - * @param first the next element to be added - * @param rest the remaining elements to be added - */ - template - inline void appendToVector(std::vector& vector, const T& first, const Elements& ... rest) { - vector.push_back(first); - appendToVector(vector, rest...); - } - - } - - /** - * Create an empty vector containing no elements. - * - * @tparam T the type of element to be stored in the resulting vector - * @return the resulting vector - */ - template - inline std::vector toVector() { - return std::vector (); - } - - /** - * Creates a vector containing the given elements. - * - * @tparam T the type of element to be stored in the resulting vector - * @tparam Elements the types of the remaining elements (need to be convertible to T) - * @param first the first element to be within the list - * @param rest the remaining elements to be stored within the list - * @return the resulting vector - */ - template - inline std::vector toVector(const T& first, const Elements& ... rest) { - std::vector res; - res.reserve(1 + sizeof...(rest)); - appendToVector(res, first, rest...); - return res; - } - - - template - struct is_vector : public std::false_type {}; - - template - struct is_vector> : public std::true_type {}; - - template - struct is_vector : public is_vector {}; - - template - struct is_vector : public is_vector {}; - -} // end namespace utils -} // end namespace allscale