Merge branch 'main' into sync-llama-cpp

mybigday · Jul 27, 2024 · 246e207 · 246e207
2 parents 96e8e9a + 7972f83
commit 246e207
Show file tree

Hide file tree

Showing 5 changed files with 31 additions and 12 deletions.
diff --git a/android/src/main/CMakeLists.txt b/android/src/main/CMakeLists.txt
@@ -47,15 +47,17 @@ function(build_library target_name cpu_flags)
         target_compile_options(${target_name} PRIVATE -DRNLLAMA_ANDROID_ENABLE_LOGGING)
     endif ()
 
-    if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
+    # NOTE: If you want to debug the native code, you can uncomment if and endif
+    # Note that it will be extremely slow
+    # if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
         target_compile_options(${target_name} PRIVATE -O3 -DNDEBUG)
         target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
         target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections)
 
         target_link_options(${target_name} PRIVATE -Wl,--gc-sections)
         target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL)
         target_link_options(${target_name} PRIVATE -flto)
-    endif ()
+    # endif ()
 endfunction()
 
 # Default target (no specific CPU features)

diff --git a/android/src/main/java/com/rnllama/LlamaContext.java b/android/src/main/java/com/rnllama/LlamaContext.java
@@ -137,7 +137,7 @@ public WritableMap completion(ReadableMap params) {
       }
     }
 
-    return doCompletion(
+    WritableMap result = doCompletion(
       this.context,
       // String prompt,
       params.getString("prompt"),
@@ -191,6 +191,10 @@ public WritableMap completion(ReadableMap params) {
         params.hasKey("emit_partial_completion") ? params.getBoolean("emit_partial_completion") : false
       )
     );
+    if (result.hasKey("error")) {
+      throw new IllegalStateException(result.getString("error"));
+    }
+    return result;
   }
 
   public void stopCompletion() {
@@ -215,12 +219,14 @@ public String detokenize(ReadableArray tokens) {
     return detokenize(this.context, toks);
   }
 
-  public WritableMap embedding(String text) {
+  public WritableMap getEmbedding(String text) {
     if (isEmbeddingEnabled(this.context) == false) {
       throw new IllegalStateException("Embedding is not enabled");
     }
-    WritableMap result = Arguments.createMap();
-    result.putArray("embedding", embedding(this.context, text));
+    WritableMap result = embedding(this.context, text);
+    if (result.hasKey("error")) {
+      throw new IllegalStateException(result.getString("error"));
+    }
     return result;
   }
 
@@ -351,7 +357,7 @@ protected static native WritableMap doCompletion(
   protected static native WritableArray tokenize(long contextPtr, String text);
   protected static native String detokenize(long contextPtr, int[] tokens);
   protected static native boolean isEmbeddingEnabled(long contextPtr);
-  protected static native WritableArray embedding(long contextPtr, String text);
+  protected static native WritableMap embedding(long contextPtr, String text);
   protected static native String bench(long contextPtr, int pp, int tg, int pl, int nr);
   protected static native void freeContext(long contextPtr);
 }
diff --git a/android/src/main/java/com/rnllama/RNLlama.java b/android/src/main/java/com/rnllama/RNLlama.java
@@ -297,7 +297,7 @@ protected WritableMap doInBackground(Void... voids) {
           if (context == null) {
             throw new Exception("Context not found");
           }
-          return context.embedding(text);
+          return context.getEmbedding(text);
         } catch (Exception e) {
           exception = e;
         }

diff --git a/android/src/main/jni.cpp b/android/src/main/jni.cpp
@@ -577,17 +577,24 @@ Java_com_rnllama_LlamaContext_embedding(
     llama->params.prompt = text_chars;
 
     llama->params.n_predict = 0;
+
+    auto result = createWriteableMap(env);
+    if (!llama->initSampling()) {
+        putString(env, result, "error", "Failed to initialize sampling");
+        return reinterpret_cast<jobject>(result);
+    }
+
     llama->beginCompletion();
     llama->loadPrompt();
     llama->doCompletion();
 
     std::vector<float> embedding = llama->getEmbedding();
 
-    jobject result = createWritableArray(env);
-
+    auto embeddings = createWritableArray(env);
     for (const auto &val : embedding) {
-      pushDouble(env, result, (double) val);
+      pushDouble(env, embeddings, (double) val);
     }
+    putArray(env, result, "embedding", embeddings);
 
     env->ReleaseStringUTFChars(text, text_chars);
     return result;

diff --git a/ios/RNLlamaContext.mm b/ios/RNLlamaContext.mm
@@ -364,8 +364,12 @@ - (NSArray *)embedding:(NSString *)text {
     llama->params.prompt = [text UTF8String];
 
     llama->params.n_predict = 0;
-    llama->loadPrompt();
+
+    if (!llama->initSampling()) {
+        @throw [NSException exceptionWithName:@"LlamaException" reason:@"Failed to initialize sampling" userInfo:nil];
+    }
     llama->beginCompletion();
+    llama->loadPrompt();
     llama->doCompletion();
 
     std::vector<float> result = llama->getEmbedding();