-
Notifications
You must be signed in to change notification settings - Fork 128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Running in .NET within Unity crashes #142
Comments
I found a solution via trial and error. |
Hi @nicolastamm , sorry for late response. I hope it helps if I respond at least now 😄 After spending half a year with BlingFire though, my humble suggestion would be not to use it at all 👎 If It helps, here's some code snippets, ping me if you need more: BlingTokenizer.csThe main script which handles loading. using System;
using System.IO;
using System.Linq;
using BlingFire;
using UnityEngine;
public class BlingTokenizer : Singleton<BlingTokenizer> {
ulong tokenizerHandle = 0;
ulong tokenizerI2WHandle = 0;
bool isLoaded = false;
public static ulong GetTokenizerHandle() {
return Instance.tokenizerHandle;
}
public static ulong GetTokenizerI2Handle() {
return Instance.tokenizerI2WHandle;
}
public static void Load() {
if (Instance.isLoaded)return;
string tokenizerModelPath = Instance.GetPath("gpt2.bin");
string tokenizerI2WPath = Instance.GetPath("gpt2.i2w");
if (File.Exists(tokenizerModelPath)) {
try {
Instance.tokenizerHandle = BlingFireUtils.LoadModel(tokenizerModelPath);
Debug.Log("Path Found and loaded: " + tokenizerModelPath);
} catch (Exception e) {
Console.WriteLine("{0} Exception caught.", e);
Debug.Log("Exception: " + tokenizerModelPath);
}
}
if (File.Exists(tokenizerI2WPath)) {
//This Crashes the Editor
try {
Instance.tokenizerI2WHandle = BlingFireUtils.LoadModel(tokenizerI2WPath);
Debug.Log("Path Found and loaded: " + tokenizerI2WPath);
} catch (Exception e) {
Console.WriteLine("{0} Exception caught.", e);
Debug.Log("Exception: " + tokenizerI2WPath);
}
}
//BlingFireUtils.SetNoDummyPrefix(GetTokenizerHandle(), true);
//BlingFireUtils.SetNoDummyPrefix(GetTokenizerI2Handle(), true);
Instance.isLoaded = true;
}
private void OnDestroy() {
BlingFireUtils.FreeModel(tokenizerHandle);
}
public static int[] Tokenize(string input_str) {
byte[] inBytes = System.Text.Encoding.UTF8.GetBytes(input_str);
int[] ids = new int[128];
int outputCount = BlingFireUtils.TextToIds(GetTokenizerHandle(), inBytes, inBytes.Length, ids, 8, -100);
ids = ids.Take(outputCount).ToArray();
return ids;
}
String GetPath(string fileName) {
return Path.GetFullPath(Path.Combine(new string[] { Application.dataPath, "Models", fileName }));
}
} LanguageModel.csThis is where I run the Load() function. Sorry for the mess, no time to make it tasty for open source consumption 😆 using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using BlingFire;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using UnityEngine;
public class LanguageModel : MonoBehaviour {
public const int VOCAB_SIZE = 50257;
String modelFilePath = "Models/gpt-neo-125M.onnx";
OrtEnv ortEnv = OrtEnv.Instance();
InferenceSession ortSession;
private void Awake() {
ortEnv.DisableTelemetryEvents();
LoadModel();
}
void Start() {
//LoadTokenizer();
}
void Update() {
if (Input.GetKeyDown(KeyCode.A)) {
LoadTokenizer();
}
if (Input.GetKeyDown(KeyCode.B)) {
//LoadModel();
}
if (Input.GetKeyDown(KeyCode.LeftControl)) {
LoadTokenizer();
var startTime = System.DateTime.Now;
string testText = "I am a man and I";
int[] inputIds = BlingTokenizer.Tokenize(testText);
var preProcTime = System.DateTime.Now;
Tensor<float> output_ids = Predict(inputIds);
var gptNeoRunTime = System.DateTime.Now;
List<List<LMPrediction>> preds = PostProcess(output_ids, 4, inputIds.Length);
var endTime = System.DateTime.Now;
Debug.Log("Gpt Neo Runtime: " + (gptNeoRunTime - startTime).TotalMilliseconds);
Debug.Log("LM PreProc Time: " + (preProcTime - startTime).TotalMilliseconds + "ms; " + "PostProcess runtime: " + (endTime - gptNeoRunTime).TotalMilliseconds + "ms");
PrintPreds(preds);
Debug.Log(NaivePredsToText(preds));
}
}
void LoadTokenizer() {
BlingTokenizer.Load();
}
void LoadModel() {
String modelPath = GetPath();
ortSession = new InferenceSession(modelPath);
}
Tensor<float> Predict(int[] input_ids) {
Debug.Log(input_ids.Length);
Tensor<Int64> input_tensor = new DenseTensor<Int64>(new [] { 1, 1, 8 });
for (int i = 0; i < 8; i++) {
input_tensor[0, 0, i] = i < input_ids.Length?input_ids[i]: -100;
}
var model_inputs = new List<NamedOnnxValue>() {
NamedOnnxValue.CreateFromTensor("input_ids", input_tensor)
};
var model_outputs = ortSession.Run(model_inputs);
var token_activation_output = model_outputs.First((v) => v.Name == "output_0").AsTensor<float>();
//var token_activation_output12 = model_outputs.First((v) => v.Name == "output_12").AsTensor<float>();
Debug.Log($"Got an output tensor [{String.Join(",", token_activation_output.Dimensions.ToArray())}]");
return token_activation_output;
}
List<LMPrediction> CreatePredictions(Tensor<float> token_activation_output, int top_k = -1, int skip = 0) {
var logits = token_activation_output.AsEnumerable<float>().Skip(skip * VOCAB_SIZE).Take(VOCAB_SIZE);
float sum = logits.Sum(x => (float)Math.Exp(x));
IEnumerable<float> softmax = logits.Select(x => (float)Math.Exp(x) / sum);
var test_sorted_predictions = softmax.Select((x, i) => new LMPrediction() { token = i, confidence = x }).OrderByDescending(x => x.confidence).Take(top_k > 0 ? top_k : VOCAB_SIZE);
return test_sorted_predictions.ToList();
}
public List<List<LMPrediction>> PostProcess(Tensor<float> token_activation_output, int top_k = -1, int inputSize = 1, int genLength = 8) {
List<List<LMPrediction>> output_predictions = new List<List<LMPrediction>>();
for (int i = 0; i < genLength - inputSize; i++) {
output_predictions.Add(CreatePredictions(token_activation_output, top_k, inputSize + i));
}
return output_predictions;
}
string NaivePredsToText(List<List<LMPrediction>> preds) {
string output = "";
foreach (List<LMPrediction> predList in preds) {
output += predList.FirstOrDefault().ToWord() + " ";
}
return output;
}
void PrintPreds(List<List<LMPrediction>> preds) {
string printVal = "";
foreach (List<LMPrediction> predList in preds) {
printVal += " \n---------------\n" + String.Join("\n", predList.Select(x => x.ToConfidenceStr()));
}
Debug.Log(printVal);
}
String GetPath() {
return Path.Combine(new string[] { Application.dataPath, modelFilePath });
}
} But yeah, I'm not using "BlankFire" anymore 🐶 |
Hey! Thanks for the code snippets and the extensive response. Unfortunately, after trying both your approach and much more trial and error I still keep getting the same error... LoadModel is definitively trying to access weird stuff since if I do attach to process and debug from visual studio it sometimes works. It seems really random. Sometimes it's the .i2w file, the .bin file or both! I was mostly interested in their claims for performance since I believe a huge problem for text-generating models being deployed in Unity is how slow the inference runs are... So what other services have you tried to forgo BlingFire altogether? Thanks again for the help! |
Hi, @nicolastamm! My initial idea which did not crash, was to load model on one button click, then wait a bit and load tokenizer on another. Just bind the functions to different keys and experiment with that. Also, have you tried different Unity versions? For me it was 2021.2.7f1, which ran most consistently. Well I can't complain about the speed. When it worked, it worked fast, the problems, however are obviously consistency and adaptability, as you've experienced too I see 😄 I did not use other services per se. I went back to service-based approach:
It was my initial attempt to do things last year and it worked quite well. Nevertheless natively running things with less intermediary code would be faster, so I decided to try libraries. None worked initially, some were too limited (i.e. NatML). Tried Barracuda, Microsoft.ML.NET, Microsoft.ML.OnnxRuntime, which finally run things via blingfire's help. Anyways, when running inference the local service way, I managed to dish out 30+ FPS with 128 x 128 size images in segmentation tasks. There's a throttling problem, but you can code around that. My only suggestion would be not to use raw libraries or frameworks which are made for experimentation, but instead try onnx runtime, it proved to be 2-3 times faster in some cases. A headache might be getting to export the model to the onnx format, but many models already have them out there, i.e. most 🤗 transformers-hosted models do, which, let's be honest, has most language models nowadays! On the other hand, Coqui-TTS doesn't, so you'd have to DIY through it. |
I am currently doing something similar. I plan to build a .net core app running onnxruntime + blingfire and then a unity client to send an HTTP request. The reason for the extra hoop is a concern for compatibility and computing power. The project's extra step is to run this in VR, ideally using stand-alone headsets. I will, however, explore the option to run everything natively after I've dealt with the deadlines for this project. Thanks again for your erudite responses! Much appreciated |
If anyone interesting about using in Unity - it does not work... A single line of code LoadModel crashes the editor.
Not much useful info either, as the error itself is very non-verbal.
Update: Code clarification (file.exists check).
The text was updated successfully, but these errors were encountered: