[T 11/17/21 18:54:36.325 12928] [opengl_api.cpp:taichi::lang::opengl::initialize_opengl@54] initialize_opengl(true) called
[D 11/17/21 18:54:36.920 12928] [opengl_api.cpp:taichi::lang::opengl::initialize_opengl@97] OpenGL context loaded through GLFW
[D 11/17/21 18:54:36.920 12928] [opengl_api.cpp:taichi::lang::opengl::initialize_opengl@187] OpenGL version 4.3
[T 11/17/21 18:54:36.920 12928] [taichi/inc/opengl_extension.inc.h:taichi::lang::opengl::initialize_opengl@3] [glsl] Found GL_ARB_compute_shader
[T 11/17/21 18:54:36.920 12928] [taichi/inc/opengl_extension.inc.h:taichi::lang::opengl::initialize_opengl@4] [glsl] Found GL_ARB_gpu_shader_int64
[T 11/17/21 18:54:36.922 12928] [taichi/inc/opengl_extension.inc.h:taichi::lang::opengl::initialize_opengl@5] [glsl] Found GL_NV_shader_atomic_float
[T 11/17/21 18:54:36.922 12928] [taichi/inc/opengl_extension.inc.h:taichi::lang::opengl::initialize_opengl@6] [glsl] Found GL_NV_shader_atomic_float64
[T 11/17/21 18:54:36.922 12928] [taichi/inc/opengl_extension.inc.h:taichi::lang::opengl::initialize_opengl@7] [glsl] Found GL_NV_shader_atomic_int64
[T 11/17/21 18:54:36.922 12928] [opengl_api.cpp:taichi::lang::opengl::initialize_opengl@206] GL_MAX_COMPUTE_WORK_GROUP_COUNT: 2147483647
[T 11/17/21 18:54:36.922 12928] [opengl_api.cpp:taichi::lang::opengl::initialize_opengl@209] GL_MAX_COMPUTE_WORK_GROUP_SIZE: 1536
[T 11/17/21 18:54:36.922 12928] [program.cpp:taichi::lang::Program::Program@46] Program initializing...
[T 11/17/21 18:54:36.922 12928] [opengl_api.cpp:taichi::lang::opengl::initialize_opengl@54] initialize_opengl(true) called
[T 11/17/21 18:54:36.922 12928] [memory_pool.cpp:taichi::lang::MemoryPool::MemoryPool@13] Memory pool created. Default buffer size per allocator = 1024 MB
[T 11/17/21 18:54:36.922 12928] [program.cpp:taichi::lang::Program::Program@159] Program (0x233bdfe5d80) arch=opengl initialized.
[T 11/17/21 18:54:36.923 12928] [__init__.py:init@548] Materializing runtime...
[T 11/17/21 18:54:36.923 12928] [unified_allocator.cpp:taichi::lang::UnifiedAllocator::UnifiedAllocator@32] Allocating virtual address space of size 1024 MB
[T 11/17/21 18:54:36.923 12928] [unified_allocator.cpp:taichi::lang::UnifiedAllocator::UnifiedAllocator@41] Memory allocated. Allocation time = 2.90e-05 s
[T 11/17/21 18:54:36.923 12928] [taichi/system/unified_allocator.h:taichi::lang::UnifiedAllocator::allocate@39] UM [data=2421380022272] allocate() request=256 remain=1073741824
[T 11/17/21 18:54:36.923 12928] [opengl_api.cpp:taichi::lang::opengl::initialize_opengl@54] initialize_opengl(false) called
[T 11/17/21 18:54:36.929 12928] [opengl_program.cpp:taichi::lang::OpenglProgramImpl::compile_snode_tree_types@42] OpenGL root buffer size: 8388608 B
[T 11/17/21 18:54:36.933 12928] [kernel_impl.py:materialize@459] Compiling kernel initialize_c48_0...
[T 11/17/21 18:54:36.942 12928] [constant_fold.cpp:taichi::lang::ConstantFold::get_jit_evaluator_kernel@68] Saving JIT evaluator cache entry id=18256548145024532736
[D 11/17/21 18:54:36.942 12928] [opengl_api.cpp:taichi::lang::opengl::CompiledProgram::add@257] [glsl]
compiling kernel jit_evaluator_00<<<1, 1>>>:
#version 430 core
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
precision highp float;
layout(std430, binding = 0) buffer data_f32 { float _data_f32_[]; };
layout(std430, binding = 2) buffer args_f32 { float _args_f32_[]; };

const float inf = 1.0f / 0.0f;
const float nan = 0.0f / 0.0f;
void jit_evaluator_00()
{ // serial
  float B = _args_f32_[0 << 1];
  float C = _args_f32_[1 << 1];
  float D = B * C;
  _args_f32_[320 >> 2 + 0] = D;
}

void main()
{
  jit_evaluator_00();
}

[D 11/17/21 18:54:36.946 12928] [opengl_api.cpp:taichi::lang::opengl::CompiledProgram::add@257] [glsl]
compiling kernel initialize_c48_00<<<8, 128>>>:
#version 430 core
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
precision highp float;
layout(std430, binding = 0) buffer data_f32 { float _data_f32_[]; };

const float inf = 1.0f / 0.0f;
const float nan = 0.0f / 0.0f;
void initialize_c48_00()
{ // range for
  // range known at compile time
  int _sid0 = int(gl_GlobalInvocationID.x);
  for (int _sid = _sid0; _sid < (1024); _sid += int(gl_WorkGroupSize.x * gl_NumWorkGroups.x)) {
    int _itv = 0 + _sid;
      int Bp = int(10);
      int Bf = int(1023);
      float B = float(0.62831855);
      int C = _itv;
      float D = float(C);
      float E = float(0.31415927);
      float F = D * E;
      float G = float(sin(F));
      int H = int(0);
      int I = int(768);
      int Br = C & Bf;
      int Bs = Br << Bp;
      for (int J_ = H; J_ < I; J_ += 1) {
        int J = J_;
        int K = J;
        float L = float(K);
        float M = L * B;
        float N = float(sin(M));
        float O = G * N;
        int AZ = 0;
        int B1 = AZ + 8388608 * H; // S0
        int B2 = B1 + 0; // S1
        int Bg = K & Bf;
        int Bo = Bg + Bs;
        int B6 = B2 + 4 * Bo; // S1
        int B7 = B6 + 0; // S2
        _data_f32_[B7 >> 2] = O;
      }
  }
}

void main()
{
  initialize_c48_00();
}

[Taichi] version 0.8.5, llvm 10.0.0, commit 45c6ad48, win, python 3.9.7
[Taichi] Starting on arch=opengl
Wait...[T 11/17/21 18:54:36.947 12928] [kernel_impl.py:materialize@459] Compiling kernel compute_c50_0...
[T 11/17/21 18:54:36.956 12928] [constant_fold.cpp:taichi::lang::ConstantFold::get_jit_evaluator_kernel@68] Saving JIT evaluator cache entry id=18256548145091904783
[D 11/17/21 18:54:36.956 12928] [opengl_api.cpp:taichi::lang::opengl::CompiledProgram::add@257] [glsl]
compiling kernel jit_evaluator_10<<<1, 1>>>:
#version 430 core
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
precision highp float;
layout(std430, binding = 0) buffer data_i32 { int _data_i32_[]; };
layout(std430, binding = 2) buffer args_i32 { int _args_i32_[]; };

const float inf = 1.0f / 0.0f;
const float nan = 0.0f / 0.0f;
void jit_evaluator_10()
{ // serial
  int B = _args_i32_[0 << 1];
  int C = _args_i32_[1 << 1];
  int D = -int(B < C);
  _args_i32_[320 >> 2 + 0] = D;
}

void main()
{
  jit_evaluator_10();
}

[T 11/17/21 18:54:36.958 12928] [constant_fold.cpp:taichi::lang::ConstantFold::get_jit_evaluator_kernel@68] Saving JIT evaluator cache entry id=18256548145091904777
[D 11/17/21 18:54:36.958 12928] [opengl_api.cpp:taichi::lang::opengl::CompiledProgram::add@257] [glsl]
compiling kernel jit_evaluator_20<<<1, 1>>>:
#version 430 core
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
precision highp float;
layout(std430, binding = 0) buffer data_i32 { int _data_i32_[]; };
layout(std430, binding = 2) buffer args_i32 { int _args_i32_[]; };

const float inf = 1.0f / 0.0f;
const float nan = 0.0f / 0.0f;
void jit_evaluator_20()
{ // serial
  int B = _args_i32_[0 << 1];
  int C = _args_i32_[1 << 1];
  int D = B & C;
  _args_i32_[320 >> 2 + 0] = D;
}

void main()
{
  jit_evaluator_20();
}

[D 11/17/21 18:54:36.961 12928] [opengl_api.cpp:taichi::lang::opengl::CompiledProgram::add@257] [glsl]
compiling kernel compute_c50_00<<<1536, 128>>>:
#version 430 core
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
precision highp float;
layout(std430, binding = 0) buffer data_f32 { float _data_f32_[]; };

const float inf = 1.0f / 0.0f;
const float nan = 0.0f / 0.0f;
void compute_c50_00()
{ // range for
  // range known at compile time
  int _sid0 = int(gl_GlobalInvocationID.x);
  for (int _sid = _sid0; _sid < (1048576); _sid += int(gl_WorkGroupSize.x * gl_NumWorkGroups.x)) {
    int _itv = 0 + _sid;
      int EL = int(1023);
      int En = int(0);
      int U = int(1024);
      int T = int(10);
      int R = int(768);
      int D = _itv;
      int E4 = D >> T;
      int Ea = D & EL;
      int O = -int(Ea < R);
      if (O != 0) {
        float V = float(0);
        for (int W_ = En; W_ < U; W_ += 1) {
          int W = W_;
          int X = W;
          int Eb = X / U;
          int Ed = -int(X < En);
          int EW = Eb << T;
          int Eg = -int(Ed != En);
          int Eh = -int(X != En);
          int Ei = -int(EW != X);
          int Ej = Eg & Eh;
          int Ek = Ej & Ei;
          int El = Eb + Ek;
          int Z = El << T;
          int Aq = X - Z;
          int Ft = Aq & EL;
          int Fu = Ft << T;
          for (int Ar_ = En; Ar_ < R; Ar_ += 1) {
            int Ar = Ar_;
            int As = Ar;
            int Em = As / R;
            int Eo = -int(As < En);
            int Eq = Em * R;
            int Er = -int(Eo != En);
            int Es = -int(As != En);
            int Et = -int(Eq != As);
            int Eu = Er & Es;
            int Ev = Eu & Et;
            int Ew = Em + Ev;
            int Au = Ew * R;
            int Av = As - Au;
            int DH = 0;
            int DJ = DH + 8388608 * En; // S0
            int DK = DJ + 0; // S1
            int EE = Av & EL;
            int Fc = EE + Fu;
            int DO = DK + 4 * Fc; // S1
            int DP = DO + 0; // S2
            float Ax = _data_f32_[DP >> 2];
            float Ay = V;
            float Az = Ay + Ax;
            V = Az;
          }
        }
        float AB = V;
        int DT = 0;
        int DV = DT + 8388608 * En; // S0
        int DW = DV + 4194304; // S3
        int Fe = E4 & EL;
        int Fs = Fe << T;
        int F4 = Ea + Fs;
        int E0 = DW + 4 * F4; // S3
        int E1 = E0 + 0; // S4
        _data_f32_[E1 >> 2] = AB;
      }
  }
}

void main()
{
  compute_c50_00();
}

[T 11/17/21 18:54:36.961 12928] [kernel_impl.py:materialize@459] Compiling kernel tensor_to_ext_arr_c4_0...