From ddd0189f6d5c0448bd43d914f425a2eb4b4bf75c Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Mon, 15 Jul 2024 15:47:03 -0700 Subject: [PATCH 01/39] Use wasm32/wasm64 instead of le32/le64 le32/le64 has been removed from LLVM19, which is bad for Halide. This attempts to use wasm32/wasm64 targets instead. --- Makefile | 4 ++-- src/runtime/CMakeLists.txt | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 5d23cfd197f7..46f5a23dc151 100644 --- a/Makefile +++ b/Makefile @@ -1054,8 +1054,8 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp # Compile generic 32- or 64-bit code # (The 'nacl' is a red herring. This is just a generic 32-bit little-endian target.) -RUNTIME_TRIPLE_32 = "le32-unknown-nacl-unknown" -RUNTIME_TRIPLE_64 = "le64-unknown-unknown-unknown" +RUNTIME_TRIPLE_32 = "wasm32-unknown-unknown-unknown" +RUNTIME_TRIPLE_64 = "wasm64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided # and (for 64 bit) must set wchar to be 2 bytes. The windows_*_x86 and windows_*_arm diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 5426c355823c..04d0d82e7ae9 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -217,17 +217,18 @@ foreach (i IN LISTS RUNTIME_CPP) # unfortunately, clang doesn't automatically set this flag even though the # ABI is msvc on windows set(fshort-wchar -fshort-wchar) - set(TARGET "le64-unknown-windows-unknown") + # TODO: was le64 here, not sure if this is correct or not + set(TARGET "x86_64-unknown-windows-unknown") endif () endif() # Everything else else() if (j EQUAL 32) - # (The 'nacl' is a red herring. This is just a generic 32-bit little-endian target.) - set(TARGET "le32-unknown-nacl-unknown") + # generic 32-bit code + set(TARGET "wasm32-unknown-unknown-unknown") else () # generic 64-bit code - set(TARGET "le64-unknown-unknown-unknown") + set(TARGET "wasm64-unknown-unknown-unknown") endif () endif () From 60b46ee5e61e6a00c5c73c04587f7b44bfde740b Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Mon, 15 Jul 2024 15:57:11 -0700 Subject: [PATCH 02/39] Update Makefile --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 46f5a23dc151..5db867878e51 100644 --- a/Makefile +++ b/Makefile @@ -1068,7 +1068,8 @@ RUNTIME_TRIPLE_WIN_X86_32 = "i386-unknown-windows-unknown" RUNTIME_TRIPLE_WIN_X86_64 = "x86_64-unknown-windows-unknown" RUNTIME_TRIPLE_WIN_ARM_32 = "arm-unknown-windows-unknown" RUNTIME_TRIPLE_WIN_ARM_64 = "aarch64-unknown-windows-unknown" -RUNTIME_TRIPLE_WIN_GENERIC_64 = "le64-unknown-windows-unknown" +# TODO: was le64 here, not sure if this is correct or not +RUNTIME_TRIPLE_WIN_GENERIC_64 = "x86_64-unknown-windows-unknown" # `-fno-threadsafe-statics` is very important here (note that it allows us to use a 'modern' C++ # standard but still skip threadsafe guards for static initialization in our runtime code) From 3cce91fad8207cbd05bd575b0c0b0203ed50f0a6 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Mon, 15 Jul 2024 16:04:26 -0700 Subject: [PATCH 03/39] Update CMakeLists.txt --- src/runtime/CMakeLists.txt | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 04d0d82e7ae9..cf4762f90576 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -217,18 +217,32 @@ foreach (i IN LISTS RUNTIME_CPP) # unfortunately, clang doesn't automatically set this flag even though the # ABI is msvc on windows set(fshort-wchar -fshort-wchar) - # TODO: was le64 here, not sure if this is correct or not - set(TARGET "x86_64-unknown-windows-unknown") + if (LLVM_PACKAGE_VERSION VERSION_LESS 19.0) + set(TARGET "le64-unknown-windows-unknown") + else () + # TODO: was le64 here, not sure if this is correct or not + set(TARGET "x86_64-unknown-windows-unknown") + endif () endif () endif() # Everything else else() - if (j EQUAL 32) - # generic 32-bit code - set(TARGET "wasm32-unknown-unknown-unknown") + if (LLVM_PACKAGE_VERSION VERSION_LESS 19.0) + if (j EQUAL 32) + # generic 32-bit code + set(TARGET "wasm32-unknown-unknown-unknown") + else () + # generic 64-bit code + set(TARGET "wasm64-unknown-unknown-unknown") + endif () else () - # generic 64-bit code - set(TARGET "wasm64-unknown-unknown-unknown") + if (j EQUAL 32) + # generic 32-bit code + set(TARGET "le32-unknown-unknown-unknown") + else () + # generic 64-bit code + set(TARGET "le64-unknown-unknown-unknown") + endif () endif () endif () From 24fd1b200049fa99c3e7c6eecceba89099409e65 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Mon, 15 Jul 2024 16:05:11 -0700 Subject: [PATCH 04/39] Update CMakeLists.txt --- src/runtime/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index cf4762f90576..f64852a70bb7 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -230,18 +230,18 @@ foreach (i IN LISTS RUNTIME_CPP) if (LLVM_PACKAGE_VERSION VERSION_LESS 19.0) if (j EQUAL 32) # generic 32-bit code - set(TARGET "wasm32-unknown-unknown-unknown") + set(TARGET "le32-unknown-nacl-unknown") else () # generic 64-bit code - set(TARGET "wasm64-unknown-unknown-unknown") + set(TARGET "le64-unknown-unknown-unknown") endif () else () if (j EQUAL 32) # generic 32-bit code - set(TARGET "le32-unknown-unknown-unknown") + set(TARGET "wasm32-unknown-unknown-unknown") else () # generic 64-bit code - set(TARGET "le64-unknown-unknown-unknown") + set(TARGET "wasm64-unknown-unknown-unknown") endif () endif () endif () From 67d80ae4c83fb29df9d0ad0c2c1cbd8da232ea0f Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Mon, 15 Jul 2024 16:09:14 -0700 Subject: [PATCH 05/39] trigger buildbots From 301c6102b0898ea29ff56b802e84cb1ff67fc23d Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jul 2024 09:20:58 -0700 Subject: [PATCH 06/39] Try x86 instead of wasm --- Makefile | 6 +++--- src/runtime/CMakeLists.txt | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 5db867878e51..f9efb64bcf3d 100644 --- a/Makefile +++ b/Makefile @@ -1053,9 +1053,9 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp .SECONDARY: # Compile generic 32- or 64-bit code -# (The 'nacl' is a red herring. This is just a generic 32-bit little-endian target.) -RUNTIME_TRIPLE_32 = "wasm32-unknown-unknown-unknown" -RUNTIME_TRIPLE_64 = "wasm64-unknown-unknown-unknown" +# (The 'x86_64' is a red herring. This is just a generic 32-bit little-endian target.) +RUNTIME_TRIPLE_32 = "i386-unknown-unknown-unknown" +RUNTIME_TRIPLE_64 = "x86_64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided # and (for 64 bit) must set wchar to be 2 bytes. The windows_*_x86 and windows_*_arm diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index f64852a70bb7..48f946b00de9 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -238,10 +238,10 @@ foreach (i IN LISTS RUNTIME_CPP) else () if (j EQUAL 32) # generic 32-bit code - set(TARGET "wasm32-unknown-unknown-unknown") + set(TARGET "i386-unknown-unknown-unknown") else () # generic 64-bit code - set(TARGET "wasm64-unknown-unknown-unknown") + set(TARGET "x86_64-unknown-unknown-unknown") endif () endif () endif () From 09fba5f9134fd20539a22673bbbc70e634484d6d Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jul 2024 10:28:49 -0700 Subject: [PATCH 07/39] -Wno-sync-alignment --- Makefile | 3 ++- src/runtime/CMakeLists.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f9efb64bcf3d..5e67e4888f58 100644 --- a/Makefile +++ b/Makefile @@ -1094,7 +1094,8 @@ RUNTIME_CXX_FLAGS = \ -Wno-unknown-warning-option \ -Wno-unused-function \ -Wvla \ - -Wsign-compare + -Wsign-compare \ + -Wno-sync-alignment $(BUILD_DIR)/initmod.windows_%_x86_32.ll: $(SRC_DIR)/runtime/windows_%_x86.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 48f946b00de9..c76c482e07ad 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -173,6 +173,7 @@ set(RUNTIME_CXX_FLAGS -Wvla -Wsign-compare -Wimplicit-fallthrough + -Wno-sync-alignment ) option(Halide_CLANG_TIDY_BUILD "Generate fake compile jobs for runtime files when running clang-tidy." OFF) From af4af9176a2338a58810ffdfc2c5f42312dd75f6 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Tue, 16 Jul 2024 13:38:22 -0700 Subject: [PATCH 08/39] Revert "-Wno-sync-alignment" This reverts commit 09fba5f9134fd20539a22673bbbc70e634484d6d. --- Makefile | 3 +-- src/runtime/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 5e67e4888f58..f9efb64bcf3d 100644 --- a/Makefile +++ b/Makefile @@ -1094,8 +1094,7 @@ RUNTIME_CXX_FLAGS = \ -Wno-unknown-warning-option \ -Wno-unused-function \ -Wvla \ - -Wsign-compare \ - -Wno-sync-alignment + -Wsign-compare $(BUILD_DIR)/initmod.windows_%_x86_32.ll: $(SRC_DIR)/runtime/windows_%_x86.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index c76c482e07ad..48f946b00de9 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -173,7 +173,6 @@ set(RUNTIME_CXX_FLAGS -Wvla -Wsign-compare -Wimplicit-fallthrough - -Wno-sync-alignment ) option(Halide_CLANG_TIDY_BUILD "Generate fake compile jobs for runtime files when running clang-tidy." OFF) From 80027293f741a3dc0e41c55e998f9273d86210c2 Mon Sep 17 00:00:00 2001 From: Andrew Adams Date: Tue, 16 Jul 2024 13:43:24 -0700 Subject: [PATCH 09/39] Don't do 64-bit atomics in 32-bit runtimes. The 64-bit atomics were all on amounts of memory, so a uintptr_t should be safe. One bit of dodginess is that memory_total could possibly exceed 32-bits while there never being more than 32-bits of memory allocated at any one time. --- src/runtime/profiler_common.cpp | 27 +++++++++++++++------------ src/runtime/runtime_atomics.h | 13 +++++++++++++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/runtime/profiler_common.cpp b/src/runtime/profiler_common.cpp index a5633898b8a9..fe78a029965d 100644 --- a/src/runtime/profiler_common.cpp +++ b/src/runtime/profiler_common.cpp @@ -180,11 +180,10 @@ WEAK void sampling_profiler_thread(void *) { namespace { -template -void sync_compare_max_and_swap(T *ptr, T val) { +void sync_compare_max_and_swap(uintptr_t *ptr, uintptr_t val) { using namespace Halide::Runtime::Internal::Synchronization; - T old_val = *ptr; + uintptr_t old_val = *ptr; while (val > old_val) { if (atomic_cas_strong_sequentially_consistent(ptr, &old_val, &val)) { return; @@ -350,7 +349,11 @@ WEAK void halide_profiler_stack_peak_update(void *user_context, // Update per-func memory stats for (int i = 0; i < instance->pipeline_stats->num_funcs; ++i) { if (f_values[i] != 0) { - sync_compare_max_and_swap(&(instance->funcs[i]).stack_peak, f_values[i]); + // On 32-bit platforms we don't want to use 64-bit + // atomics. Fortunately on these platforms memory usage fits into + // 32-bit integers. + sync_compare_max_and_swap((uintptr_t *)(&(instance->funcs[i]).stack_peak), + (uintptr_t)(f_values[i])); } } } @@ -382,15 +385,15 @@ WEAK void halide_profiler_memory_allocate(void *user_context, // Update per-instance memory stats atomic_add_fetch_sequentially_consistent(&instance->num_allocs, 1); - atomic_add_fetch_sequentially_consistent(&instance->memory_total, incr); - uint64_t p_mem_current = atomic_add_fetch_sequentially_consistent(&instance->memory_current, incr); - sync_compare_max_and_swap(&instance->memory_peak, p_mem_current); + atomic_add_fetch_sequentially_consistent((uintptr_t *)(&instance->memory_total), (uintptr_t)incr); + uint64_t p_mem_current = atomic_add_fetch_sequentially_consistent((uintptr_t *)(&instance->memory_current), (uintptr_t)incr); + sync_compare_max_and_swap((uintptr_t *)(&instance->memory_peak), (uintptr_t)p_mem_current); // Update per-func memory stats atomic_add_fetch_sequentially_consistent(&func->num_allocs, 1); - atomic_add_fetch_sequentially_consistent(&func->memory_total, incr); - uint64_t f_mem_current = atomic_add_fetch_sequentially_consistent(&func->memory_current, incr); - sync_compare_max_and_swap(&func->memory_peak, f_mem_current); + atomic_add_fetch_sequentially_consistent((uintptr_t *)(&func->memory_total), (uintptr_t)incr); + uint64_t f_mem_current = atomic_add_fetch_sequentially_consistent((uintptr_t *)(&func->memory_current), (uintptr_t)incr); + sync_compare_max_and_swap((uintptr_t *)(&func->memory_peak), (uintptr_t)f_mem_current); } WEAK void halide_profiler_memory_free(void *user_context, @@ -418,10 +421,10 @@ WEAK void halide_profiler_memory_free(void *user_context, // unless user specifically calls halide_profiler_reset(). // Update per-pipeline memory stats - atomic_sub_fetch_sequentially_consistent(&instance->memory_current, decr); + atomic_sub_fetch_sequentially_consistent((uintptr_t *)(&instance->memory_current), (uintptr_t)decr); // Update per-func memory stats - atomic_sub_fetch_sequentially_consistent(&func->memory_current, decr); + atomic_sub_fetch_sequentially_consistent((uintptr_t *)(&func->memory_current), (uintptr_t)decr); } WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_state *s) { diff --git a/src/runtime/runtime_atomics.h b/src/runtime/runtime_atomics.h index 61139a622d75..255dbee30f3b 100644 --- a/src/runtime/runtime_atomics.h +++ b/src/runtime/runtime_atomics.h @@ -35,36 +35,43 @@ ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) template ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) { + static_assert(sizeof(T) == 4); return __sync_fetch_and_add(addr, val); } template::type> ALWAYS_INLINE T atomic_fetch_add_sequentially_consistent(T *addr, TV val) { + static_assert(sizeof(T) == 4); return __sync_fetch_and_add(addr, val); } template::type> ALWAYS_INLINE T atomic_fetch_sub_sequentially_consistent(T *addr, TV val) { + static_assert(sizeof(T) == 4); return __sync_fetch_and_sub(addr, val); } template::type> ALWAYS_INLINE T atomic_fetch_or_sequentially_consistent(T *addr, TV val) { + static_assert(sizeof(T) == 4); return __sync_fetch_and_or(addr, val); } template ALWAYS_INLINE T atomic_add_fetch_sequentially_consistent(T *addr, T val) { + static_assert(sizeof(T) == 4); return __sync_add_and_fetch(addr, val); } template ALWAYS_INLINE T atomic_sub_fetch_sequentially_consistent(T *addr, T val) { + static_assert(sizeof(T) == 4); return __sync_sub_and_fetch(addr, val); } template::type> ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, TV *expected, TV *desired) { + static_assert(sizeof(T) == 4); TV oldval = *expected; TV gotval = __sync_val_compare_and_swap(addr, oldval, *desired); *expected = gotval; @@ -99,11 +106,13 @@ ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *e template ALWAYS_INLINE T atomic_fetch_and_release(T *addr, T val) { + static_assert(sizeof(T) == 4); return __sync_fetch_and_and(addr, val); } template::type> ALWAYS_INLINE T atomic_fetch_and_sequentially_consistent(T *addr, TV val) { + static_assert(sizeof(T) == 4); return __sync_fetch_and_and(addr, val); } @@ -121,6 +130,7 @@ ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) { template ALWAYS_INLINE T atomic_exchange_acquire(T *addr, T val) { // Despite the name, this is really just an exchange operation with acquire ordering. + static_assert(sizeof(T) == 4); return __sync_lock_test_and_set(addr, val); } @@ -130,17 +140,20 @@ ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) template ALWAYS_INLINE void atomic_store_relaxed(T *addr, T *val) { + static_assert(sizeof(T) == 4); *addr = *val; } template ALWAYS_INLINE void atomic_store_release(T *addr, T *val) { + static_assert(sizeof(T) == 4); *addr = *val; __sync_synchronize(); } template::type> ALWAYS_INLINE void atomic_store_sequentially_consistent(T *addr, TV *val) { + static_assert(sizeof(T) == 4); *addr = *val; __sync_synchronize(); } From 30a755c9b15079f38e5562685acbc305bf5f43b1 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jul 2024 18:19:20 -0700 Subject: [PATCH 10/39] Reapply "-Wno-sync-alignment" This reverts commit af4af9176a2338a58810ffdfc2c5f42312dd75f6. --- Makefile | 3 ++- src/runtime/CMakeLists.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f9efb64bcf3d..5e67e4888f58 100644 --- a/Makefile +++ b/Makefile @@ -1094,7 +1094,8 @@ RUNTIME_CXX_FLAGS = \ -Wno-unknown-warning-option \ -Wno-unused-function \ -Wvla \ - -Wsign-compare + -Wsign-compare \ + -Wno-sync-alignment $(BUILD_DIR)/initmod.windows_%_x86_32.ll: $(SRC_DIR)/runtime/windows_%_x86.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 48f946b00de9..c76c482e07ad 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -173,6 +173,7 @@ set(RUNTIME_CXX_FLAGS -Wvla -Wsign-compare -Wimplicit-fallthrough + -Wno-sync-alignment ) option(Halide_CLANG_TIDY_BUILD "Generate fake compile jobs for runtime files when running clang-tidy." OFF) From 2cfd588e78047f6c9d9303699631ef272a8cb1d4 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jul 2024 18:19:24 -0700 Subject: [PATCH 11/39] Revert "Don't do 64-bit atomics in 32-bit runtimes." This reverts commit 80027293f741a3dc0e41c55e998f9273d86210c2. --- src/runtime/profiler_common.cpp | 27 ++++++++++++--------------- src/runtime/runtime_atomics.h | 13 ------------- 2 files changed, 12 insertions(+), 28 deletions(-) diff --git a/src/runtime/profiler_common.cpp b/src/runtime/profiler_common.cpp index fe78a029965d..a5633898b8a9 100644 --- a/src/runtime/profiler_common.cpp +++ b/src/runtime/profiler_common.cpp @@ -180,10 +180,11 @@ WEAK void sampling_profiler_thread(void *) { namespace { -void sync_compare_max_and_swap(uintptr_t *ptr, uintptr_t val) { +template +void sync_compare_max_and_swap(T *ptr, T val) { using namespace Halide::Runtime::Internal::Synchronization; - uintptr_t old_val = *ptr; + T old_val = *ptr; while (val > old_val) { if (atomic_cas_strong_sequentially_consistent(ptr, &old_val, &val)) { return; @@ -349,11 +350,7 @@ WEAK void halide_profiler_stack_peak_update(void *user_context, // Update per-func memory stats for (int i = 0; i < instance->pipeline_stats->num_funcs; ++i) { if (f_values[i] != 0) { - // On 32-bit platforms we don't want to use 64-bit - // atomics. Fortunately on these platforms memory usage fits into - // 32-bit integers. - sync_compare_max_and_swap((uintptr_t *)(&(instance->funcs[i]).stack_peak), - (uintptr_t)(f_values[i])); + sync_compare_max_and_swap(&(instance->funcs[i]).stack_peak, f_values[i]); } } } @@ -385,15 +382,15 @@ WEAK void halide_profiler_memory_allocate(void *user_context, // Update per-instance memory stats atomic_add_fetch_sequentially_consistent(&instance->num_allocs, 1); - atomic_add_fetch_sequentially_consistent((uintptr_t *)(&instance->memory_total), (uintptr_t)incr); - uint64_t p_mem_current = atomic_add_fetch_sequentially_consistent((uintptr_t *)(&instance->memory_current), (uintptr_t)incr); - sync_compare_max_and_swap((uintptr_t *)(&instance->memory_peak), (uintptr_t)p_mem_current); + atomic_add_fetch_sequentially_consistent(&instance->memory_total, incr); + uint64_t p_mem_current = atomic_add_fetch_sequentially_consistent(&instance->memory_current, incr); + sync_compare_max_and_swap(&instance->memory_peak, p_mem_current); // Update per-func memory stats atomic_add_fetch_sequentially_consistent(&func->num_allocs, 1); - atomic_add_fetch_sequentially_consistent((uintptr_t *)(&func->memory_total), (uintptr_t)incr); - uint64_t f_mem_current = atomic_add_fetch_sequentially_consistent((uintptr_t *)(&func->memory_current), (uintptr_t)incr); - sync_compare_max_and_swap((uintptr_t *)(&func->memory_peak), (uintptr_t)f_mem_current); + atomic_add_fetch_sequentially_consistent(&func->memory_total, incr); + uint64_t f_mem_current = atomic_add_fetch_sequentially_consistent(&func->memory_current, incr); + sync_compare_max_and_swap(&func->memory_peak, f_mem_current); } WEAK void halide_profiler_memory_free(void *user_context, @@ -421,10 +418,10 @@ WEAK void halide_profiler_memory_free(void *user_context, // unless user specifically calls halide_profiler_reset(). // Update per-pipeline memory stats - atomic_sub_fetch_sequentially_consistent((uintptr_t *)(&instance->memory_current), (uintptr_t)decr); + atomic_sub_fetch_sequentially_consistent(&instance->memory_current, decr); // Update per-func memory stats - atomic_sub_fetch_sequentially_consistent((uintptr_t *)(&func->memory_current), (uintptr_t)decr); + atomic_sub_fetch_sequentially_consistent(&func->memory_current, decr); } WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_state *s) { diff --git a/src/runtime/runtime_atomics.h b/src/runtime/runtime_atomics.h index 255dbee30f3b..61139a622d75 100644 --- a/src/runtime/runtime_atomics.h +++ b/src/runtime/runtime_atomics.h @@ -35,43 +35,36 @@ ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) template ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) { - static_assert(sizeof(T) == 4); return __sync_fetch_and_add(addr, val); } template::type> ALWAYS_INLINE T atomic_fetch_add_sequentially_consistent(T *addr, TV val) { - static_assert(sizeof(T) == 4); return __sync_fetch_and_add(addr, val); } template::type> ALWAYS_INLINE T atomic_fetch_sub_sequentially_consistent(T *addr, TV val) { - static_assert(sizeof(T) == 4); return __sync_fetch_and_sub(addr, val); } template::type> ALWAYS_INLINE T atomic_fetch_or_sequentially_consistent(T *addr, TV val) { - static_assert(sizeof(T) == 4); return __sync_fetch_and_or(addr, val); } template ALWAYS_INLINE T atomic_add_fetch_sequentially_consistent(T *addr, T val) { - static_assert(sizeof(T) == 4); return __sync_add_and_fetch(addr, val); } template ALWAYS_INLINE T atomic_sub_fetch_sequentially_consistent(T *addr, T val) { - static_assert(sizeof(T) == 4); return __sync_sub_and_fetch(addr, val); } template::type> ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, TV *expected, TV *desired) { - static_assert(sizeof(T) == 4); TV oldval = *expected; TV gotval = __sync_val_compare_and_swap(addr, oldval, *desired); *expected = gotval; @@ -106,13 +99,11 @@ ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *e template ALWAYS_INLINE T atomic_fetch_and_release(T *addr, T val) { - static_assert(sizeof(T) == 4); return __sync_fetch_and_and(addr, val); } template::type> ALWAYS_INLINE T atomic_fetch_and_sequentially_consistent(T *addr, TV val) { - static_assert(sizeof(T) == 4); return __sync_fetch_and_and(addr, val); } @@ -130,7 +121,6 @@ ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) { template ALWAYS_INLINE T atomic_exchange_acquire(T *addr, T val) { // Despite the name, this is really just an exchange operation with acquire ordering. - static_assert(sizeof(T) == 4); return __sync_lock_test_and_set(addr, val); } @@ -140,20 +130,17 @@ ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) template ALWAYS_INLINE void atomic_store_relaxed(T *addr, T *val) { - static_assert(sizeof(T) == 4); *addr = *val; } template ALWAYS_INLINE void atomic_store_release(T *addr, T *val) { - static_assert(sizeof(T) == 4); *addr = *val; __sync_synchronize(); } template::type> ALWAYS_INLINE void atomic_store_sequentially_consistent(T *addr, TV *val) { - static_assert(sizeof(T) == 4); *addr = *val; __sync_synchronize(); } From 9325a2bd0d1202e62dabb2bbc82969c43c09e45a Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jul 2024 18:21:50 -0700 Subject: [PATCH 12/39] try arm instead --- Makefile | 8 ++++---- src/runtime/CMakeLists.txt | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 5e67e4888f58..493b0e8b43e2 100644 --- a/Makefile +++ b/Makefile @@ -1053,9 +1053,9 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp .SECONDARY: # Compile generic 32- or 64-bit code -# (The 'x86_64' is a red herring. This is just a generic 32-bit little-endian target.) -RUNTIME_TRIPLE_32 = "i386-unknown-unknown-unknown" -RUNTIME_TRIPLE_64 = "x86_64-unknown-unknown-unknown" +# (The 'arm' is a red herring. This is just a generic 32-bit little-endian target.) +RUNTIME_TRIPLE_32 = "arm-unknown-unknown-unknown" +RUNTIME_TRIPLE_64 = "aarch64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided # and (for 64 bit) must set wchar to be 2 bytes. The windows_*_x86 and windows_*_arm @@ -1069,7 +1069,7 @@ RUNTIME_TRIPLE_WIN_X86_64 = "x86_64-unknown-windows-unknown" RUNTIME_TRIPLE_WIN_ARM_32 = "arm-unknown-windows-unknown" RUNTIME_TRIPLE_WIN_ARM_64 = "aarch64-unknown-windows-unknown" # TODO: was le64 here, not sure if this is correct or not -RUNTIME_TRIPLE_WIN_GENERIC_64 = "x86_64-unknown-windows-unknown" +RUNTIME_TRIPLE_WIN_GENERIC_64 = "aarch64-unknown-windows-unknown" # `-fno-threadsafe-statics` is very important here (note that it allows us to use a 'modern' C++ # standard but still skip threadsafe guards for static initialization in our runtime code) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index c76c482e07ad..ed27276584b3 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -222,7 +222,7 @@ foreach (i IN LISTS RUNTIME_CPP) set(TARGET "le64-unknown-windows-unknown") else () # TODO: was le64 here, not sure if this is correct or not - set(TARGET "x86_64-unknown-windows-unknown") + set(TARGET "aarch64-unknown-windows-unknown") endif () endif () endif() @@ -239,10 +239,10 @@ foreach (i IN LISTS RUNTIME_CPP) else () if (j EQUAL 32) # generic 32-bit code - set(TARGET "i386-unknown-unknown-unknown") + set(TARGET "arm-unknown-unknown-unknown") else () # generic 64-bit code - set(TARGET "x86_64-unknown-unknown-unknown") + set(TARGET "aarch64-unknown-unknown-unknown") endif () endif () endif () From a9b0b6136e75dbb5edbe84669c705df361daa5a7 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jul 2024 18:25:40 -0700 Subject: [PATCH 13/39] sync warning --- Makefile | 3 +-- src/runtime/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 493b0e8b43e2..fe51bea72b06 100644 --- a/Makefile +++ b/Makefile @@ -1094,8 +1094,7 @@ RUNTIME_CXX_FLAGS = \ -Wno-unknown-warning-option \ -Wno-unused-function \ -Wvla \ - -Wsign-compare \ - -Wno-sync-alignment + -Wsign-compare $(BUILD_DIR)/initmod.windows_%_x86_32.ll: $(SRC_DIR)/runtime/windows_%_x86.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index ed27276584b3..f4d2b5bc6e3f 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -173,7 +173,6 @@ set(RUNTIME_CXX_FLAGS -Wvla -Wsign-compare -Wimplicit-fallthrough - -Wno-sync-alignment ) option(Halide_CLANG_TIDY_BUILD "Generate fake compile jobs for runtime files when running clang-tidy." OFF) From 4e5bd58cd500fe6dc71e3f9be55878ac43001635 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jul 2024 18:48:06 -0700 Subject: [PATCH 14/39] gnueabihf --- Makefile | 2 +- src/runtime/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index fe51bea72b06..e9223e138282 100644 --- a/Makefile +++ b/Makefile @@ -1054,7 +1054,7 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp # Compile generic 32- or 64-bit code # (The 'arm' is a red herring. This is just a generic 32-bit little-endian target.) -RUNTIME_TRIPLE_32 = "arm-unknown-unknown-unknown" +RUNTIME_TRIPLE_32 = "arm-unknown-unknown-unknown-gnueabihf" RUNTIME_TRIPLE_64 = "aarch64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index f4d2b5bc6e3f..ef160e681217 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -238,7 +238,7 @@ foreach (i IN LISTS RUNTIME_CPP) else () if (j EQUAL 32) # generic 32-bit code - set(TARGET "arm-unknown-unknown-unknown") + set(TARGET "arm-unknown-unknown-unknown-gnueabihf") else () # generic 64-bit code set(TARGET "aarch64-unknown-unknown-unknown") From bfbe7189804fa91fcc5584ea67a87d167f60a646 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 16 Jul 2024 18:54:17 -0700 Subject: [PATCH 15/39] gnueabihf --- Makefile | 2 +- src/runtime/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e9223e138282..521280bc0602 100644 --- a/Makefile +++ b/Makefile @@ -1054,7 +1054,7 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp # Compile generic 32- or 64-bit code # (The 'arm' is a red herring. This is just a generic 32-bit little-endian target.) -RUNTIME_TRIPLE_32 = "arm-unknown-unknown-unknown-gnueabihf" +RUNTIME_TRIPLE_32 = "arm-unknown-unknown-gnueabihf" RUNTIME_TRIPLE_64 = "aarch64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index ef160e681217..f5c3fa732e0e 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -238,7 +238,7 @@ foreach (i IN LISTS RUNTIME_CPP) else () if (j EQUAL 32) # generic 32-bit code - set(TARGET "arm-unknown-unknown-unknown-gnueabihf") + set(TARGET "arm-unknown-unknown-gnueabihf") else () # generic 64-bit code set(TARGET "aarch64-unknown-unknown-unknown") From 2e8a6e81e07665dc87db396b0c3056b20a7c8168 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 17 Jul 2024 10:07:48 -0700 Subject: [PATCH 16/39] Comments --- Makefile | 2 +- src/runtime/CMakeLists.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 521280bc0602..733c5fedb7a1 100644 --- a/Makefile +++ b/Makefile @@ -1053,7 +1053,7 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp .SECONDARY: # Compile generic 32- or 64-bit code -# (The 'arm' is a red herring. This is just a generic 32-bit little-endian target.) +# Don't be fooled: arm/aarch64 are just generic 32/64-bit targets for our purposes here RUNTIME_TRIPLE_32 = "arm-unknown-unknown-gnueabihf" RUNTIME_TRIPLE_64 = "aarch64-unknown-unknown-unknown" diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index f5c3fa732e0e..58d254db58bf 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -236,11 +236,10 @@ foreach (i IN LISTS RUNTIME_CPP) set(TARGET "le64-unknown-unknown-unknown") endif () else () + # don't be fooled: arm/aarch64 are just generic 32/64-bit targets for our purposes here if (j EQUAL 32) - # generic 32-bit code set(TARGET "arm-unknown-unknown-gnueabihf") else () - # generic 64-bit code set(TARGET "aarch64-unknown-unknown-unknown") endif () endif () From 64017558bb9220813d7a31f594421b97b6f9bb65 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 17 Jul 2024 11:34:04 -0700 Subject: [PATCH 17/39] Update CMakeLists.txt --- src/runtime/CMakeLists.txt | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 58d254db58bf..80192c7c9f32 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -217,31 +217,17 @@ foreach (i IN LISTS RUNTIME_CPP) # unfortunately, clang doesn't automatically set this flag even though the # ABI is msvc on windows set(fshort-wchar -fshort-wchar) - if (LLVM_PACKAGE_VERSION VERSION_LESS 19.0) - set(TARGET "le64-unknown-windows-unknown") - else () - # TODO: was le64 here, not sure if this is correct or not - set(TARGET "aarch64-unknown-windows-unknown") - endif () + # TODO: was le64 here, not sure if this is correct or not + set(TARGET "aarch64-unknown-windows-unknown") endif () endif() # Everything else else() - if (LLVM_PACKAGE_VERSION VERSION_LESS 19.0) - if (j EQUAL 32) - # generic 32-bit code - set(TARGET "le32-unknown-nacl-unknown") - else () - # generic 64-bit code - set(TARGET "le64-unknown-unknown-unknown") - endif () + # don't be fooled: arm/aarch64 are just generic 32/64-bit targets for our purposes here + if (j EQUAL 32) + set(TARGET "arm-unknown-unknown-gnueabihf") else () - # don't be fooled: arm/aarch64 are just generic 32/64-bit targets for our purposes here - if (j EQUAL 32) - set(TARGET "arm-unknown-unknown-gnueabihf") - else () - set(TARGET "aarch64-unknown-unknown-unknown") - endif () + set(TARGET "aarch64-unknown-unknown-unknown") endif () endif () From def7da020b433a2de5266aec4290eb601c1edec1 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 17 Jul 2024 14:28:12 -0700 Subject: [PATCH 18/39] Let's try x86 instead --- Makefile | 6 +++--- src/runtime/CMakeLists.txt | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 733c5fedb7a1..39bcd6d2c427 100644 --- a/Makefile +++ b/Makefile @@ -1053,9 +1053,9 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp .SECONDARY: # Compile generic 32- or 64-bit code -# Don't be fooled: arm/aarch64 are just generic 32/64-bit targets for our purposes here -RUNTIME_TRIPLE_32 = "arm-unknown-unknown-gnueabihf" -RUNTIME_TRIPLE_64 = "aarch64-unknown-unknown-unknown" +# Don't be fooled: i386/x86_64 are just generic 32/64-bit targets for our purposes here +RUNTIME_TRIPLE_32 = "i386-unknown-unknown-unknown" +RUNTIME_TRIPLE_64 = "x86_64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided # and (for 64 bit) must set wchar to be 2 bytes. The windows_*_x86 and windows_*_arm diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 80192c7c9f32..c2bdb6793e2d 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -218,16 +218,16 @@ foreach (i IN LISTS RUNTIME_CPP) # ABI is msvc on windows set(fshort-wchar -fshort-wchar) # TODO: was le64 here, not sure if this is correct or not - set(TARGET "aarch64-unknown-windows-unknown") + set(TARGET "x86_64-unknown-windows-unknown") endif () endif() # Everything else else() - # don't be fooled: arm/aarch64 are just generic 32/64-bit targets for our purposes here + # don't be fooled: i386/x86_64 are just generic 32/64-bit targets for our purposes here if (j EQUAL 32) - set(TARGET "arm-unknown-unknown-gnueabihf") + set(TARGET "i386-unknown-unknown-unknown") else () - set(TARGET "aarch64-unknown-unknown-unknown") + set(TARGET "x86_64-unknown-unknown-unknown") endif () endif () From 0e766ae49b43fbe38058889592de804baa0ebd49 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 17 Jul 2024 14:31:35 -0700 Subject: [PATCH 19/39] -Wno-sync-alignment --- Makefile | 3 ++- src/runtime/CMakeLists.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 39bcd6d2c427..222d86f4410d 100644 --- a/Makefile +++ b/Makefile @@ -1094,7 +1094,8 @@ RUNTIME_CXX_FLAGS = \ -Wno-unknown-warning-option \ -Wno-unused-function \ -Wvla \ - -Wsign-compare + -Wsign-compare \ + -Wno-sync-alignment $(BUILD_DIR)/initmod.windows_%_x86_32.ll: $(SRC_DIR)/runtime/windows_%_x86.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index c2bdb6793e2d..82c060ad0d5c 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -173,6 +173,7 @@ set(RUNTIME_CXX_FLAGS -Wvla -Wsign-compare -Wimplicit-fallthrough + -Wno-sync-alignment ) option(Halide_CLANG_TIDY_BUILD "Generate fake compile jobs for runtime files when running clang-tidy." OFF) From 3e99fb84557f251c9aff67e2d0fbf89268edd764 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 18 Jul 2024 10:27:02 -0700 Subject: [PATCH 20/39] Back to wasm --- Makefile | 6 +++--- src/runtime/CMakeLists.txt | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 222d86f4410d..d6f173028249 100644 --- a/Makefile +++ b/Makefile @@ -1053,9 +1053,9 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp .SECONDARY: # Compile generic 32- or 64-bit code -# Don't be fooled: i386/x86_64 are just generic 32/64-bit targets for our purposes here -RUNTIME_TRIPLE_32 = "i386-unknown-unknown-unknown" -RUNTIME_TRIPLE_64 = "x86_64-unknown-unknown-unknown" +# Don't be fooled: these are just generic 32/64-bit targets for our purposes here +RUNTIME_TRIPLE_32 = "wasm32-unknown-unknown-unknown" +RUNTIME_TRIPLE_64 = "wasm64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided # and (for 64 bit) must set wchar to be 2 bytes. The windows_*_x86 and windows_*_arm diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 82c060ad0d5c..d9d0b5d39e3a 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -224,11 +224,11 @@ foreach (i IN LISTS RUNTIME_CPP) endif() # Everything else else() - # don't be fooled: i386/x86_64 are just generic 32/64-bit targets for our purposes here + # don't be fooled: these are just generic 32/64-bit targets for our purposes here if (j EQUAL 32) - set(TARGET "i386-unknown-unknown-unknown") + set(TARGET "wasm32-unknown-unknown-unknown") else () - set(TARGET "x86_64-unknown-unknown-unknown") + set(TARGET "wasm64-unknown-unknown-unknown") endif () endif () From 678164f79889a32df22de0a80b46b00846ec3e47 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 18 Jul 2024 11:29:46 -0700 Subject: [PATCH 21/39] Update LLVM_Runtime_Linker.cpp --- src/LLVM_Runtime_Linker.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index e601a3325ce6..634587f50fb5 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -715,6 +715,10 @@ void link_modules(std::vector> &modules, Target t, convert_weak_to_linkonce(f); } } + // Ensure it isn't hidden. + if (is_halide_extern_c_sym) { + f.setVisibility(llvm::GlobalValue::DefaultVisibility); + } // Windows requires every symbol that's going to get merged // has a comdat that specifies how. The linkage type alone From 0c050b2e9f27cbc685acdac92e95ce08cf42b9f2 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Fri, 19 Jul 2024 08:00:33 -0700 Subject: [PATCH 22/39] Fix RUNTIME_TRIPLE_WIN_GENERIC_64 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d6f173028249..ddf1d1a8995a 100644 --- a/Makefile +++ b/Makefile @@ -1069,7 +1069,7 @@ RUNTIME_TRIPLE_WIN_X86_64 = "x86_64-unknown-windows-unknown" RUNTIME_TRIPLE_WIN_ARM_32 = "arm-unknown-windows-unknown" RUNTIME_TRIPLE_WIN_ARM_64 = "aarch64-unknown-windows-unknown" # TODO: was le64 here, not sure if this is correct or not -RUNTIME_TRIPLE_WIN_GENERIC_64 = "aarch64-unknown-windows-unknown" +RUNTIME_TRIPLE_WIN_GENERIC_64 = "x86_64-unknown-windows-unknown" # `-fno-threadsafe-statics` is very important here (note that it allows us to use a 'modern' C++ # standard but still skip threadsafe guards for static initialization in our runtime code) From 3ca975b25df5422a33c1db94ca6102babfa03847 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Fri, 19 Jul 2024 08:02:06 -0700 Subject: [PATCH 23/39] Reenable warning --- Makefile | 3 +-- src/runtime/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index ddf1d1a8995a..73b6758a05c9 100644 --- a/Makefile +++ b/Makefile @@ -1094,8 +1094,7 @@ RUNTIME_CXX_FLAGS = \ -Wno-unknown-warning-option \ -Wno-unused-function \ -Wvla \ - -Wsign-compare \ - -Wno-sync-alignment + -Wsign-compare $(BUILD_DIR)/initmod.windows_%_x86_32.ll: $(SRC_DIR)/runtime/windows_%_x86.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index d9d0b5d39e3a..ab6781c90708 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -173,7 +173,6 @@ set(RUNTIME_CXX_FLAGS -Wvla -Wsign-compare -Wimplicit-fallthrough - -Wno-sync-alignment ) option(Halide_CLANG_TIDY_BUILD "Generate fake compile jobs for runtime files when running clang-tidy." OFF) From 622c9cd032ba469261f275fba99110110172266f Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 23 Jul 2024 10:31:51 -0700 Subject: [PATCH 24/39] Update LLVM_Runtime_Linker.cpp --- src/LLVM_Runtime_Linker.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index 634587f50fb5..2a4f0bda4608 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -709,16 +709,16 @@ void link_modules(std::vector> &modules, Target t, // convert all of those to plain external. if (f.getLinkage() == llvm::GlobalValue::ExternalWeakLinkage) { f.setLinkage(llvm::GlobalValue::ExternalLinkage); + // Ensure it isn't hidden. + if (is_halide_extern_c_sym) { + f.setVisibility(llvm::GlobalValue::DefaultVisibility); + } } else { const bool can_strip = !is_halide_extern_c_sym; if (can_strip || allow_stripping_all_weak_functions) { convert_weak_to_linkonce(f); } } - // Ensure it isn't hidden. - if (is_halide_extern_c_sym) { - f.setVisibility(llvm::GlobalValue::DefaultVisibility); - } // Windows requires every symbol that's going to get merged // has a comdat that specifies how. The linkage type alone From 9885b306024249cf2c4dd8c296ac5c32821f5ea2 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 23 Jul 2024 13:08:54 -0700 Subject: [PATCH 25/39] Update LLVM_Runtime_Linker.cpp --- src/LLVM_Runtime_Linker.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index 2a4f0bda4608..ef1087578a13 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -709,10 +709,9 @@ void link_modules(std::vector> &modules, Target t, // convert all of those to plain external. if (f.getLinkage() == llvm::GlobalValue::ExternalWeakLinkage) { f.setLinkage(llvm::GlobalValue::ExternalLinkage); - // Ensure it isn't hidden. - if (is_halide_extern_c_sym) { - f.setVisibility(llvm::GlobalValue::DefaultVisibility); - } + } else if (f.getLinkage() == llvm::GlobalValue::WeakAnyLinkage && is_halide_extern_c_sym && t.arch == Target::X86 && t.bits == 32) { + // Special case for x86-32 + f.setLinkage(llvm::GlobalValue::ExternalLinkage); } else { const bool can_strip = !is_halide_extern_c_sym; if (can_strip || allow_stripping_all_weak_functions) { @@ -720,6 +719,11 @@ void link_modules(std::vector> &modules, Target t, } } + // Ensure it isn't hidden. + if (is_halide_extern_c_sym && f.isWeakForLinker() && f.getVisibility() == llvm::GlobalValue::HiddenVisibility) { + f.setVisibility(llvm::GlobalValue::DefaultVisibility); + } + // Windows requires every symbol that's going to get merged // has a comdat that specifies how. The linkage type alone // isn't enough. From 5b0ccc0842b8f8b98ce92cf57553302ff6fbbdf2 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 23 Jul 2024 14:58:05 -0700 Subject: [PATCH 26/39] Update LLVM_Runtime_Linker.cpp --- src/LLVM_Runtime_Linker.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index ef1087578a13..c18ab4bc4f39 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -709,9 +709,6 @@ void link_modules(std::vector> &modules, Target t, // convert all of those to plain external. if (f.getLinkage() == llvm::GlobalValue::ExternalWeakLinkage) { f.setLinkage(llvm::GlobalValue::ExternalLinkage); - } else if (f.getLinkage() == llvm::GlobalValue::WeakAnyLinkage && is_halide_extern_c_sym && t.arch == Target::X86 && t.bits == 32) { - // Special case for x86-32 - f.setLinkage(llvm::GlobalValue::ExternalLinkage); } else { const bool can_strip = !is_halide_extern_c_sym; if (can_strip || allow_stripping_all_weak_functions) { From fe54bfe783651045688f396ad80b571b3198701a Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 24 Jul 2024 08:12:18 -0700 Subject: [PATCH 27/39] Fixes --- Makefile | 2 +- src/runtime/CMakeLists.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 8a54910c1421..9abe28ab71a4 100644 --- a/Makefile +++ b/Makefile @@ -1054,7 +1054,7 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp # Compile generic 32- or 64-bit code # Don't be fooled: these are just generic 32/64-bit targets for our purposes here -RUNTIME_TRIPLE_32 = "wasm32-unknown-unknown-unknown" +RUNTIME_TRIPLE_32 = "i386-unknown-unknown-unknown" RUNTIME_TRIPLE_64 = "wasm64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index ab6781c90708..5a3f0cfaae51 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -225,7 +225,8 @@ foreach (i IN LISTS RUNTIME_CPP) else() # don't be fooled: these are just generic 32/64-bit targets for our purposes here if (j EQUAL 32) - set(TARGET "wasm32-unknown-unknown-unknown") + # wasm32 will fail for some i386 builds, but i386 won't + set(TARGET "i386-unknown-unknown-unknown") else () set(TARGET "wasm64-unknown-unknown-unknown") endif () From b28acbc1e277e20a71d7e55156b5b320b7f0eac0 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 24 Jul 2024 08:25:43 -0700 Subject: [PATCH 28/39] warnings --- Makefile | 3 ++- src/runtime/CMakeLists.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9abe28ab71a4..09ea42d7b242 100644 --- a/Makefile +++ b/Makefile @@ -1094,7 +1094,8 @@ RUNTIME_CXX_FLAGS = \ -Wno-unknown-warning-option \ -Wno-unused-function \ -Wvla \ - -Wsign-compare + -Wsign-compare \ + -Wno-sync-alignment $(BUILD_DIR)/initmod.windows_%_x86_32.ll: $(SRC_DIR)/runtime/windows_%_x86.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 5a3f0cfaae51..98e131e1e3cb 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -173,6 +173,7 @@ set(RUNTIME_CXX_FLAGS -Wvla -Wsign-compare -Wimplicit-fallthrough + -Wno-sync-alignment ) option(Halide_CLANG_TIDY_BUILD "Generate fake compile jobs for runtime files when running clang-tidy." OFF) From 97046ce7ee17a7f038ff7c3a0a73e3b964ba1bb6 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 24 Jul 2024 10:51:33 -0700 Subject: [PATCH 29/39] wasm64->x86_64 --- Makefile | 2 +- src/LLVM_Runtime_Linker.cpp | 5 ----- src/runtime/CMakeLists.txt | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 09ea42d7b242..12f661e520d5 100644 --- a/Makefile +++ b/Makefile @@ -1055,7 +1055,7 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp # Compile generic 32- or 64-bit code # Don't be fooled: these are just generic 32/64-bit targets for our purposes here RUNTIME_TRIPLE_32 = "i386-unknown-unknown-unknown" -RUNTIME_TRIPLE_64 = "wasm64-unknown-unknown-unknown" +RUNTIME_TRIPLE_64 = "x86_64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided # and (for 64 bit) must set wchar to be 2 bytes. The windows_*_x86 and windows_*_arm diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index c18ab4bc4f39..e601a3325ce6 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -716,11 +716,6 @@ void link_modules(std::vector> &modules, Target t, } } - // Ensure it isn't hidden. - if (is_halide_extern_c_sym && f.isWeakForLinker() && f.getVisibility() == llvm::GlobalValue::HiddenVisibility) { - f.setVisibility(llvm::GlobalValue::DefaultVisibility); - } - // Windows requires every symbol that's going to get merged // has a comdat that specifies how. The linkage type alone // isn't enough. diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 98e131e1e3cb..93221ec1ed11 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -229,7 +229,7 @@ foreach (i IN LISTS RUNTIME_CPP) # wasm32 will fail for some i386 builds, but i386 won't set(TARGET "i386-unknown-unknown-unknown") else () - set(TARGET "wasm64-unknown-unknown-unknown") + set(TARGET "x86_64-unknown-unknown-unknown") endif () endif () From 007186b71565648b583eef50692dd5a09febc8bc Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Wed, 24 Jul 2024 13:59:49 -0700 Subject: [PATCH 30/39] back to wasm64 --- Makefile | 2 +- src/runtime/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 12f661e520d5..09ea42d7b242 100644 --- a/Makefile +++ b/Makefile @@ -1055,7 +1055,7 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp # Compile generic 32- or 64-bit code # Don't be fooled: these are just generic 32/64-bit targets for our purposes here RUNTIME_TRIPLE_32 = "i386-unknown-unknown-unknown" -RUNTIME_TRIPLE_64 = "x86_64-unknown-unknown-unknown" +RUNTIME_TRIPLE_64 = "wasm64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided # and (for 64 bit) must set wchar to be 2 bytes. The windows_*_x86 and windows_*_arm diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 93221ec1ed11..98e131e1e3cb 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -229,7 +229,7 @@ foreach (i IN LISTS RUNTIME_CPP) # wasm32 will fail for some i386 builds, but i386 won't set(TARGET "i386-unknown-unknown-unknown") else () - set(TARGET "x86_64-unknown-unknown-unknown") + set(TARGET "wasm64-unknown-unknown-unknown") endif () endif () From 38f9ab6bb0ca6772dc6d6cdffe48d626e199d6dc Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 25 Jul 2024 10:30:29 -0700 Subject: [PATCH 31/39] Use wasm32/wasm64 for webgpu --- Makefile | 19 +++++++++++++++++++ src/runtime/CMakeLists.txt | 10 ++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 09ea42d7b242..c36ee78eb8e9 100644 --- a/Makefile +++ b/Makefile @@ -1071,6 +1071,9 @@ RUNTIME_TRIPLE_WIN_ARM_64 = "aarch64-unknown-windows-unknown" # TODO: was le64 here, not sure if this is correct or not RUNTIME_TRIPLE_WIN_GENERIC_64 = "x86_64-unknown-windows-unknown" +RUNTIME_TRIPLE_WEBGPU_32 = "wasm32-unknown-unknown-unknown" +RUNTIME_TRIPLE_WEBGPU_64 = "wasm64-unknown-unknown-unknown" + # `-fno-threadsafe-statics` is very important here (note that it allows us to use a 'modern' C++ # standard but still skip threadsafe guards for static initialization in our runtime code) # @@ -1121,6 +1124,22 @@ $(BUILD_DIR)/initmod.windows_%_64.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_D @mkdir -p $(@D) $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WIN_GENERIC_64) -fshort-wchar -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_64.d +$(BUILD_DIR)/initmod.webgpu_%_32.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d + +$(BUILD_DIR)/initmod.webgpu_%_64.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d + +$(BUILD_DIR)/initmod.webgpu_%_32_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d + +$(BUILD_DIR)/initmod.webgpu_%_64_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64_debug.d + $(BUILD_DIR)/initmod.%_64.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -fpic -m64 -target $(RUNTIME_TRIPLE_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_64.d diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 98e131e1e3cb..c0142bef4375 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -222,14 +222,20 @@ foreach (i IN LISTS RUNTIME_CPP) set(TARGET "x86_64-unknown-windows-unknown") endif () endif() - # Everything else + elseif (i MATCHES "webgpu") + if (j EQUAL 32) + # wasm32 will fail for some i386 builds, but i386 won't + set(TARGET "wasm32-unknown-unknown-unknown") + else () + set(TARGET "wasm64-unknown-unknown-unknown") + endif () else() # don't be fooled: these are just generic 32/64-bit targets for our purposes here if (j EQUAL 32) # wasm32 will fail for some i386 builds, but i386 won't set(TARGET "i386-unknown-unknown-unknown") else () - set(TARGET "wasm64-unknown-unknown-unknown") + set(TARGET "x86_64-unknown-unknown-unknown") endif () endif () From 2d5033bc2aed4d93749ba4c993e31236587c5c43 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 25 Jul 2024 10:38:48 -0700 Subject: [PATCH 32/39] Update Makefile --- Makefile | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index c36ee78eb8e9..8ce5ed31cc3d 100644 --- a/Makefile +++ b/Makefile @@ -1070,7 +1070,6 @@ RUNTIME_TRIPLE_WIN_ARM_32 = "arm-unknown-windows-unknown" RUNTIME_TRIPLE_WIN_ARM_64 = "aarch64-unknown-windows-unknown" # TODO: was le64 here, not sure if this is correct or not RUNTIME_TRIPLE_WIN_GENERIC_64 = "x86_64-unknown-windows-unknown" - RUNTIME_TRIPLE_WEBGPU_32 = "wasm32-unknown-unknown-unknown" RUNTIME_TRIPLE_WEBGPU_64 = "wasm64-unknown-unknown-unknown" @@ -1125,20 +1124,20 @@ $(BUILD_DIR)/initmod.windows_%_64.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_D $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WIN_GENERIC_64) -fshort-wchar -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_64.d $(BUILD_DIR)/initmod.webgpu_%_32.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d $(BUILD_DIR)/initmod.webgpu_%_64.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d $(BUILD_DIR)/initmod.webgpu_%_32_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d $(BUILD_DIR)/initmod.webgpu_%_64_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64_debug.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64_debug.d $(BUILD_DIR)/initmod.%_64.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) From 39cd51dba28e7f1397dea13ec647d47cb5bd941b Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 25 Jul 2024 11:02:17 -0700 Subject: [PATCH 33/39] trigger buildbots From 55c3610f0717501ae96d7855231623272ab75010 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 25 Jul 2024 15:40:35 -0700 Subject: [PATCH 34/39] Revert "Update Makefile" This reverts commit 2d5033bc2aed4d93749ba4c993e31236587c5c43. --- Makefile | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 8ce5ed31cc3d..c36ee78eb8e9 100644 --- a/Makefile +++ b/Makefile @@ -1070,6 +1070,7 @@ RUNTIME_TRIPLE_WIN_ARM_32 = "arm-unknown-windows-unknown" RUNTIME_TRIPLE_WIN_ARM_64 = "aarch64-unknown-windows-unknown" # TODO: was le64 here, not sure if this is correct or not RUNTIME_TRIPLE_WIN_GENERIC_64 = "x86_64-unknown-windows-unknown" + RUNTIME_TRIPLE_WEBGPU_32 = "wasm32-unknown-unknown-unknown" RUNTIME_TRIPLE_WEBGPU_64 = "wasm64-unknown-unknown-unknown" @@ -1124,20 +1125,20 @@ $(BUILD_DIR)/initmod.windows_%_64.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_D $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WIN_GENERIC_64) -fshort-wchar -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_64.d $(BUILD_DIR)/initmod.webgpu_%_32.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d $(BUILD_DIR)/initmod.webgpu_%_64.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d $(BUILD_DIR)/initmod.webgpu_%_32_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d $(BUILD_DIR)/initmod.webgpu_%_64_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64_debug.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64_debug.d $(BUILD_DIR)/initmod.%_64.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) From a2064e451724f1bd33a3cbec94dc4a623cf4e8ba Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 25 Jul 2024 15:40:39 -0700 Subject: [PATCH 35/39] Revert "Use wasm32/wasm64 for webgpu" This reverts commit 38f9ab6bb0ca6772dc6d6cdffe48d626e199d6dc. --- Makefile | 19 ------------------- src/runtime/CMakeLists.txt | 10 ++-------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/Makefile b/Makefile index c36ee78eb8e9..09ea42d7b242 100644 --- a/Makefile +++ b/Makefile @@ -1071,9 +1071,6 @@ RUNTIME_TRIPLE_WIN_ARM_64 = "aarch64-unknown-windows-unknown" # TODO: was le64 here, not sure if this is correct or not RUNTIME_TRIPLE_WIN_GENERIC_64 = "x86_64-unknown-windows-unknown" -RUNTIME_TRIPLE_WEBGPU_32 = "wasm32-unknown-unknown-unknown" -RUNTIME_TRIPLE_WEBGPU_64 = "wasm64-unknown-unknown-unknown" - # `-fno-threadsafe-statics` is very important here (note that it allows us to use a 'modern' C++ # standard but still skip threadsafe guards for static initialization in our runtime code) # @@ -1124,22 +1121,6 @@ $(BUILD_DIR)/initmod.windows_%_64.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_D @mkdir -p $(@D) $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WIN_GENERIC_64) -fshort-wchar -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_64.d -$(BUILD_DIR)/initmod.webgpu_%_32.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d - -$(BUILD_DIR)/initmod.webgpu_%_64.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d - -$(BUILD_DIR)/initmod.webgpu_%_32_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d - -$(BUILD_DIR)/initmod.webgpu_%_64_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64_debug.d - $(BUILD_DIR)/initmod.%_64.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -fpic -m64 -target $(RUNTIME_TRIPLE_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_64.d diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index c0142bef4375..98e131e1e3cb 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -222,20 +222,14 @@ foreach (i IN LISTS RUNTIME_CPP) set(TARGET "x86_64-unknown-windows-unknown") endif () endif() - elseif (i MATCHES "webgpu") - if (j EQUAL 32) - # wasm32 will fail for some i386 builds, but i386 won't - set(TARGET "wasm32-unknown-unknown-unknown") - else () - set(TARGET "wasm64-unknown-unknown-unknown") - endif () + # Everything else else() # don't be fooled: these are just generic 32/64-bit targets for our purposes here if (j EQUAL 32) # wasm32 will fail for some i386 builds, but i386 won't set(TARGET "i386-unknown-unknown-unknown") else () - set(TARGET "x86_64-unknown-unknown-unknown") + set(TARGET "wasm64-unknown-unknown-unknown") endif () endif () From 09c86a2f89fc8c7f31545ed1a3bcd09f630be200 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 25 Jul 2024 15:40:48 -0700 Subject: [PATCH 36/39] Revert "back to wasm64" This reverts commit 007186b71565648b583eef50692dd5a09febc8bc. --- Makefile | 2 +- src/runtime/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 09ea42d7b242..12f661e520d5 100644 --- a/Makefile +++ b/Makefile @@ -1055,7 +1055,7 @@ $(BIN_DIR)/build_halide_h: $(ROOT_DIR)/tools/build_halide_h.cpp # Compile generic 32- or 64-bit code # Don't be fooled: these are just generic 32/64-bit targets for our purposes here RUNTIME_TRIPLE_32 = "i386-unknown-unknown-unknown" -RUNTIME_TRIPLE_64 = "wasm64-unknown-unknown-unknown" +RUNTIME_TRIPLE_64 = "x86_64-unknown-unknown-unknown" # Windows requires special handling. The generic windows_* modules must have -fpic elided # and (for 64 bit) must set wchar to be 2 bytes. The windows_*_x86 and windows_*_arm diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 98e131e1e3cb..93221ec1ed11 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -229,7 +229,7 @@ foreach (i IN LISTS RUNTIME_CPP) # wasm32 will fail for some i386 builds, but i386 won't set(TARGET "i386-unknown-unknown-unknown") else () - set(TARGET "wasm64-unknown-unknown-unknown") + set(TARGET "x86_64-unknown-unknown-unknown") endif () endif () From abbc1cd189652605186e1f413f77c6c1b877df4a Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Thu, 25 Jul 2024 15:41:28 -0700 Subject: [PATCH 37/39] -fno-jump-tables --- Makefile | 1 + src/runtime/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 12f661e520d5..3b01862d6919 100644 --- a/Makefile +++ b/Makefile @@ -1085,6 +1085,7 @@ RUNTIME_CXX_FLAGS = \ -fno-vectorize \ -fno-threadsafe-statics \ -fno-rtti \ + -fno-jump-tables \ -Wall \ -Wcast-qual \ -Werror \ diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 93221ec1ed11..4d6d5f44c9b9 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -161,6 +161,8 @@ set(RUNTIME_CXX_FLAGS -fno-threadsafe-statics # Necessary for using virtual functions in the runtime code. -fno-rtti + # Will generate bad code in some situations + -fno-jump-tables -Wall -Wc++20-designator -Wcast-qual From b9497bdade717b580e29a90baf6e5f175ec03421 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Fri, 26 Jul 2024 08:35:16 -0700 Subject: [PATCH 38/39] Reapply "Use wasm32/wasm64 for webgpu" This reverts commit a2064e451724f1bd33a3cbec94dc4a623cf4e8ba. --- Makefile | 19 +++++++++++++++++++ src/runtime/CMakeLists.txt | 8 +++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3b01862d6919..dddcc0fe28d6 100644 --- a/Makefile +++ b/Makefile @@ -1071,6 +1071,9 @@ RUNTIME_TRIPLE_WIN_ARM_64 = "aarch64-unknown-windows-unknown" # TODO: was le64 here, not sure if this is correct or not RUNTIME_TRIPLE_WIN_GENERIC_64 = "x86_64-unknown-windows-unknown" +RUNTIME_TRIPLE_WEBGPU_32 = "wasm32-unknown-unknown-unknown" +RUNTIME_TRIPLE_WEBGPU_64 = "wasm64-unknown-unknown-unknown" + # `-fno-threadsafe-statics` is very important here (note that it allows us to use a 'modern' C++ # standard but still skip threadsafe guards for static initialization in our runtime code) # @@ -1122,6 +1125,22 @@ $(BUILD_DIR)/initmod.windows_%_64.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_D @mkdir -p $(@D) $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WIN_GENERIC_64) -fshort-wchar -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_64.d +$(BUILD_DIR)/initmod.webgpu_%_32.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d + +$(BUILD_DIR)/initmod.webgpu_%_64.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d + +$(BUILD_DIR)/initmod.webgpu_%_32_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d + +$(BUILD_DIR)/initmod.webgpu_%_64_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64_debug.d + $(BUILD_DIR)/initmod.%_64.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -fpic -m64 -target $(RUNTIME_TRIPLE_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.$*_64.d diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 4d6d5f44c9b9..fba385a85ee7 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -224,7 +224,13 @@ foreach (i IN LISTS RUNTIME_CPP) set(TARGET "x86_64-unknown-windows-unknown") endif () endif() - # Everything else + elseif (i MATCHES "webgpu") + if (j EQUAL 32) + # wasm32 will fail for some i386 builds, but i386 won't + set(TARGET "wasm32-unknown-unknown-unknown") + else () + set(TARGET "wasm64-unknown-unknown-unknown") + endif () else() # don't be fooled: these are just generic 32/64-bit targets for our purposes here if (j EQUAL 32) From d6affabb6c9421e0f24e72db75627b7eb78ea1ee Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Fri, 26 Jul 2024 08:42:28 -0700 Subject: [PATCH 39/39] tabs --- Makefile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index dddcc0fe28d6..b84f7ea1f7fc 100644 --- a/Makefile +++ b/Makefile @@ -1126,20 +1126,20 @@ $(BUILD_DIR)/initmod.windows_%_64.ll: $(SRC_DIR)/runtime/windows_%.cpp $(BUILD_D $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WIN_GENERIC_64) -fshort-wchar -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/windows_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.windows_$*_64.d $(BUILD_DIR)/initmod.webgpu_%_32.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32.d $(BUILD_DIR)/initmod.webgpu_%_64.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64.d $(BUILD_DIR)/initmod.webgpu_%_32_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m32 -target $(RUNTIME_TRIPLE_WEBGPU_32) -DCOMPILING_HALIDE_RUNTIME -DBITS_32 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_32_debug.d $(BUILD_DIR)/initmod.webgpu_%_64_debug.ll: $(SRC_DIR)/runtime/webgpu_%.cpp $(BUILD_DIR)/clang_ok - @mkdir -p $(@D) - $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64_debug.d + @mkdir -p $(@D) + $(CLANG) $(CXX_WARNING_FLAGS) -g -DDEBUG_RUNTIME $(RUNTIME_CXX_FLAGS) -m64 -target $(RUNTIME_TRIPLE_WEBGPU_64) -DCOMPILING_HALIDE_RUNTIME -DBITS_64 -emit-llvm -S $(SRC_DIR)/runtime/webgpu_$*.cpp -o $@ -MMD -MP -MF $(BUILD_DIR)/initmod.webgpu_$*_64_debug.d $(BUILD_DIR)/initmod.%_64.ll: $(SRC_DIR)/runtime/%.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D)