diff --git a/taichi/backends/opengl/codegen_opengl.cpp b/taichi/backends/opengl/codegen_opengl.cpp index ad244b017c1b0e..e4e9aeeeb632f4 100644 --- a/taichi/backends/opengl/codegen_opengl.cpp +++ b/taichi/backends/opengl/codegen_opengl.cpp @@ -61,33 +61,36 @@ class KernelGen : public IRVisitor { std::string kernel_name, StructCompiledResult *struct_compiled) : kernel(kernel), - compiled_program_(std::make_unique(kernel)), struct_compiled_(struct_compiled), kernel_name_(kernel_name), glsl_kernel_prefix_(kernel_name), + compiled_program_(std::make_unique(kernel)), ps(std::make_unique(0)) { allow_undefined_visitor = true; invoke_default_visitor = true; } private: - std::unique_ptr compiled_program_; - + // constants: StructCompiledResult *struct_compiled_; const SNode *root_snode_; GetRootStmt *root_stmt_; std::string kernel_name_; - std::string glsl_kernel_name_; std::string root_snode_type_name_; std::string glsl_kernel_prefix_; - int glsl_kernel_count_{0}; + // throughout variables: + int glsl_kernel_count_{0}; bool is_top_level_{true}; - bool is_grid_stride_loop_{false}; + std::unique_ptr compiled_program_; + UsedFeature used; // TODO: is this actually per-offload? + + // per-offload variables: LineAppender line_appender_; LineAppender line_appender_header_; + std::string glsl_kernel_name_; std::unique_ptr ps; - UsedFeature used; + bool is_grid_stride_loop_{false}; size_t max_tls_size{0}; template @@ -166,18 +169,6 @@ class KernelGen : public IRVisitor { kernel_header += "layout(std430, binding = 3) buffer earg_i32 { int _earg_i32_[]; };\n"; } - if (used.buf_thls) { - kernel_header += - fmt::format("int _thls_i32_[{}];\n", max_tls_size); - kernel_header += - fmt::format("float _thls_f32_[{}];\n", max_tls_size); - if (used.float64) - kernel_header += - fmt::format("double _thls_f64_[{}];\n", max_tls_size); - if (used.int64) - kernel_header += - fmt::format("int64_t _thls_i64_[{}];\n", max_tls_size); - } if (used.buf_extr) { kernel_header += "layout(std430, binding = 4) buffer extr_i32 { int _extr_i32_[]; };\n" @@ -187,6 +178,18 @@ class KernelGen : public IRVisitor { if (used.int64) kernel_header += "layout(std430, binding = 4) buffer extr_i64 { int64_t _extr_i64_[]; };\n"; } + if (max_tls_size != 0) { + kernel_header += + fmt::format("int _tls_i32_[{}];\n", max_tls_size); + kernel_header += + fmt::format("float _tls_f32_[{}];\n", max_tls_size); + if (used.float64) + kernel_header += + fmt::format("double _tls_f64_[{}];\n", max_tls_size); + if (used.int64) + kernel_header += + fmt::format("int64_t _tls_i64_[{}];\n", max_tls_size); + } // clang-format on if (used.simulated_atomic_float) { kernel_header += ( @@ -237,6 +240,7 @@ class KernelGen : public IRVisitor { line_appender_header_.clear_all(); line_appender_.clear_all(); ps = std::make_unique(0); + max_tls_size = 0; } void visit(Block *stmt) override { @@ -861,10 +865,9 @@ class KernelGen : public IRVisitor { void visit(ThreadLocalPtrStmt *stmt) override { TI_ASSERT(stmt->width() == 1); - used.buf_thls = true; max_tls_size = stmt->offset + 1; emit("int {} = {};", stmt->short_name(), stmt->offset); - ptr_signats[stmt->id] = "thls"; + ptr_signats[stmt->id] = "tls"; } void visit(LoopIndexStmt *stmt) override { diff --git a/taichi/backends/opengl/opengl_kernel_util.h b/taichi/backends/opengl/opengl_kernel_util.h index 594f3229d4aa2f..5dd62233b70a46 100644 --- a/taichi/backends/opengl/opengl_kernel_util.h +++ b/taichi/backends/opengl/opengl_kernel_util.h @@ -26,7 +26,6 @@ struct UsedFeature { bool buf_earg{false}; bool buf_extr{false}; bool buf_gtmp{false}; - bool buf_thls{false}; // utilties: bool fast_pow{false};