[Graph] windows build (#312)

* fix win build error * add win header * modify MD * clang-format 14
intel · Sep 14, 2023 · bffa1b0 · bffa1b0
1 parent c76c7e4
commit bffa1b0
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 5 deletions.
diff --git a/intel_extension_for_transformers/llm/runtime/graph/README.md b/intel_extension_for_transformers/llm/runtime/graph/README.md
@@ -37,12 +37,20 @@ We support the following models:
 ## How to use
 
 ### 1. Build LLM Runtime
+Linux
 ```shell
 mkdir build
 cd build
 cmake .. -G Ninja
 ninja
 ```
+Windows: install VisualStudio 2022(a validated veresion), search 'Developer PowerShell for VS 2022' and open it, then run the following cmds.
+```powershell
+mkdir build
+cd build
+cmake ..
+cmake --build . -j
+```
 
 ### 2. Convert LLM
 LLM Runtime assumes the same model format as [llama.cpp](https://github.com/ggerganov/llama.cpp) and [ggml](https://github.com/ggerganov/ggml). You can also convert the model by following the below steps:

diff --git a/intel_extension_for_transformers/llm/runtime/graph/application/common.h b/intel_extension_for_transformers/llm/runtime/graph/application/common.h
@@ -25,6 +25,7 @@
 #include <random>
 #include <regex>
 #include <thread>
+#include <functional>
 
 #include "core/data_types.h"
 #include "core/ne_layers.h"

diff --git a/intel_extension_for_transformers/llm/runtime/graph/core/layers/mha_dense.cpp b/intel_extension_for_transformers/llm/runtime/graph/core/layers/mha_dense.cpp
@@ -1759,7 +1759,7 @@ void jblas_fusion_attn_fp32_fp16_fp16_fp32_forward(const attn_fp32_fp16_fp16_fp3
   // return jblas_fusion_attn_forward_ref(*reinterpret_cast<const attn_fwd_args_t<float, fp16, fp16, float>*>(params));
 }
 
-bool blas_fusion_attn_fp16_support(const attn_shape_t* params) {
+bool jblas_fusion_attn_fp16_support(const attn_shape_t* params) {
 #if CompileFP16()
   GetCPUDevice();
   // TODO check K V's layout

diff --git a/intel_extension_for_transformers/llm/runtime/graph/models/chatglm/chatglm2_utils.cpp b/intel_extension_for_transformers/llm/runtime/graph/models/chatglm/chatglm2_utils.cpp
@@ -144,10 +144,10 @@ void CHATGLM2::load(model_context& lctx, model_progress_callback progress_callba
     layer.attn[2] = ml->get_tensor(layers_i + ".self_attention.dense.weight", {n_embd, n_embd}, backend);
 
     // ffn GEMM
-    layer.ffn[0] =
-        ml->get_tensor(layers_i + ".mlp.dense_h_to_4h.weight", {n_embd, model.hparams.ffn_hidden_size * 2}, backend);
-    layer.ffn[1] =
-        ml->get_tensor(layers_i + ".mlp.dense_4h_to_h.weight", {model.hparams.ffn_hidden_size, n_embd}, backend);
+    layer.ffn[0] = ml->get_tensor(layers_i + ".mlp.dense_h_to_4h.weight",
+                                  {n_embd, uint32_t(model.hparams.ffn_hidden_size * 2)}, backend);
+    layer.ffn[1] = ml->get_tensor(layers_i + ".mlp.dense_4h_to_h.weight",
+                                  {uint32_t(model.hparams.ffn_hidden_size), n_embd}, backend);
 
     layer.k_cache = d_ne_new_tensor_3d(model.ctx, NE_TYPE_F16, 4096 / 32, 32768, 2);
     layer.v_cache = d_ne_new_tensor_3d(model.ctx, NE_TYPE_F16, 32768, 4096 / 32, 2);