onicai · icppWorld · Feb 2, 2025 · Jan 23, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/.github/workflows/cicd-mac.yml b/.github/workflows/cicd-mac.yml
@@ -40,6 +40,7 @@ jobs:
         with:
           repository: onicai/llama_cpp_onicai_fork
           ref: onicai  # Specify the branch name here
+          # ref: onicai-615212  # While working on the upgrade...
           path: src/llama_cpp_onicai_fork
           fetch-depth: 1 # Get just the last commit
           submodules: 'recursive'

diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,5 @@
 # Misc
-llama_cpp_onicai_fork
+llama_cpp_onicai_fork*
 *.code-workspace
 x
 y

diff --git a/README-0001-b841d0.md b/README-0001-b841d0.md
@@ -0,0 +1,110 @@
+# DETAILS FOR UPGRADE from llama.cpp sha `615212` to `b841d0`
+
+### cpp_paths
+
+#### main_.cpp
+`meld main_.cpp llama_cpp_onicai_fork/examples/main/main.cpp`
+- use `main_` instead of `main`
+- A few items related to console & ctrl+C need to be outcommented
+
+
+#### llama_cpp_onicai_fork/src/llama.cpp
+- add `#include "ic_api.h"`
+- replace `throw std::runtime_error(format` with `IC_API::trap(std::string("RUNTIME ERROR: ") + format`
+- replace `throw` with `IC_API::trap`
+- outcomment `try - catch`. The program will abrupt in case of thrown exceptions.
+- outcomment threading related items:
+  - `#include <future>`
+  - `#include <mutex>`
+  - `#include <thread>`
+- outcomment these functions completely:
+  - `llama_tensor_quantize_internal`
+  - `llama_model_quantize_internal`
+
+
+#### llama_cpp_onicai_fork/src/llama-vocab.cpp
+- add `#include "ic_api.h"`
+- replace `throw std::runtime_error(format` with `IC_API::trap(std::string("RUNTIME ERROR: ") + format`
+- outcomment `try - catch`. The program will abrupt in case of thrown exceptions.
+- add a check on `llama_token_bos(model)`, else the llama2.c models never stop generating:
+  ```
+  bool llama_token_is_eog_impl(const struct llama_vocab & vocab, llama_token token) {
+      return token != -1 && (
+          token == llama_token_eos_impl(vocab) ||
+          token == llama_token_eot_impl(vocab) || 
+          token == llama_token_bos_impl(vocab) // ICPP-PATCH: the llama2.c model predicts bos without first predicting an eos
+      );
+  }
+  ```
+
+#### llama_cpp_onicai_fork/src/llama-grammar.cpp
+No changes needed
+
+#### llama_cpp_onicai_fork/src/llama-sampling.cpp
+No changes needed
+
+#### llama_cpp_onicai_fork/src/unicode-data.cpp
+- no modifications needed for the IC
+
+#### llama_cpp_onicai_fork/src/unicode.cpp
+- add `#include "ic_api.h"`
+- replace `throw` with `IC_API::trap`
+
+#### llama_cpp_onicai_fork/common/json-schema-to-grammar.cpp
+- add `#include "ic_api.h"`
+- replace `throw` with `IC_API::trap`
+- outcomment `try - catch`. The program will abrupt in case of thrown exceptions.
+
+
+#### llama_cpp_onicai_fork/common/build-info.cpp
+- run this command to create it:
+```
+make build-info-cpp-wasm
+``` 
+
+#### llama_cpp_onicai_fork/common/grammar-parser.cpp
+- add `#include "ic_api.h"`
+- replace `throw` with `IC_API::trap`
+- outcomment `try - catch`. The program will abrupt in case of thrown exceptions.
+
+#### llama_cpp_onicai_fork/common/sampling.cpp
+- add `#include "ic_api.h"`
+- replace `throw` with `IC_API::trap`
+
+#### llama_cpp_onicai_fork/common/common.cpp
+- add `#include "ic_api.h"`
+- replace `throw` with `IC_API::trap`
+- outcomment all code related to `<pthread.h>`
+- outcomment `try - catch`. The program will abrupt in case of thrown exceptions.
+- outcomment `std::getenv`
+
+
+---
+### c_paths
+
+#### llama_cpp_onicai_fork/ggml/src/ggml.c
+- outcomment all code related to signals
+  - `#include <signal.h>`
+- Many threading outcomments. 
+
+#### llama_cpp_onicai_fork/ggml/src/ggml-alloc.c
+No updates needed for icpp-pro
+
+#### llama_cpp_onicai_fork/ggml/src/ggml-backend.c
+No updates needed for icpp-pro
+
+#### llama_cpp_onicai_fork/ggml/src/ggml-quants.c
+No updates needed for icpp-pro
+
+#### llama_cpp_onicai_fork/ggml/src/ggml-aarch64.c
+No updates needed for icpp-pro
+
+---
+### headers to modify
+
+#### llama_cpp_onicai_fork/common/log.h
+- `#include <thread>`
+- Some other threading code
+
+#### llama_cpp_onicai_fork/common/common.h
+- `#include <thread>`