yacineMTB · choombaa · Jul 6, 2023 · yacineMTB · Jul 8, 2023
diff --git a/commands.json b/commands.json
@@ -0,0 +1,43 @@
+[
+  {
+    "name": "restart",
+    "threshold": 0.5,
+    "phrases": [
+      {
+        "content": "Start over",
+        "embedding": [],
+        "similarity": 0
+      },
+      {
+        "content": "Restart the conversation",
+        "embedding": [],
+        "similarity": 0
+      },
+      {
+        "content": "From the top",
+        "embedding": [],
+        "similarity": 0
+      },
+      {
+        "content": "Wait, stop, go back",
+        "embedding": [],
+        "similarity": 0
+      },
+      {
+        "content": "No no no",
+        "embedding": [],
+        "similarity": 0
+      },
+      {
+        "content": "From the beginning",
+        "embedding": [],
+        "similarity": 0
+      },
+      {
+        "content": "Take it from the top",
+        "embedding": [],
+        "similarity": 0
+      }
+    ]
+  }
+]
diff --git a/embed.ts b/embed.ts
@@ -0,0 +1,31 @@
+import fs from 'fs';
+import util from 'util';
+import { CommandEmbedding } from './src/talk';
+import { llamaEmbed } from './src/depedenciesLibrary/llm';
+
+const readFileAsync = util.promisify(fs.readFile);
+const writeFileAsync = util.promisify(fs.writeFile);
+
+const COMMAND_DEF_PATH = 'commands.json';
+const EMBEDDINGS_PATH = 'embeddings.json';
+const llamaServerUrl = 'http://127.0.0.1:8080';
+
+const embedCommands = async (): Promise<void> => {
+  const commandData = await readFileAsync(COMMAND_DEF_PATH, 'utf8');
+  const commands: CommandEmbedding[] = JSON.parse(commandData);
+  for (const command of commands) {  
+    for (const phrase of command.phrases) {
+      phrase.embedding = await llamaEmbed(llamaServerUrl, phrase.content);
+    }
+  }
+  const jsonStr = JSON.stringify(commands);
+
+  try {
+    await writeFileAsync(EMBEDDINGS_PATH, jsonStr, 'utf8');
+  } catch (err) {
+    console.error('Error writing JSON to file:', err);
+  }
+  console.log(`Embedding data written to ${EMBEDDINGS_PATH} successfully.`);
+}
+
+embedCommands();
diff --git a/embeddings.json b/embeddings.json
diff --git a/index.ts b/index.ts
@@ -2,7 +2,8 @@ import { spawn } from 'child_process';
 import readline from 'readline';
 import config from './config.json';
 const { whisperModelPath, audioListenerScript } = config;
-import { talk } from './src/talk';
+import embeddings from './embeddings.json';
+import { talk, checkTranscriptionForCommand, CommandType, CommandEmbedding } from './src/talk';
 const fs = require('fs');
 const path = require('path');
 
@@ -189,12 +190,15 @@ const transcriptionEventHandler = async (event: AudioBytesEvent) => {
 
   // TODO: Wait for 1s, because whisper bindings currently throw out if not enough audio passed in
   // Therefore fix whisper
+  let transcription = '';
   if (!transcriptionMutex && joinedBuffer.length > ONE_SECOND) {
     transcriptionMutex = true;
     globalWhisperPromise = whisper.whisperInferenceOnBytes(joinedBuffer);
     const rawTranscription = await globalWhisperPromise;
     // Remove transcription artifacts like (wind howling)
-    const transcription = rawTranscription.replace(/\s*\[[^\]]*\]\s*|\s*\([^)]*\)\s*/g, '');
+    transcription = rawTranscription.replace(/\s*\[[^\]]*\]\s*|\s*\([^)]*\)\s*/g, '');
+    // Trim starting whitespace
+    transcription = transcription.trimStart();
     const transcriptionEvent: TranscriptionEvent = {
       timestamp: Number(Date.now()),
       eventType: 'transcription',
@@ -207,6 +211,11 @@ const transcriptionEventHandler = async (event: AudioBytesEvent) => {
     newEventHandler(transcriptionEvent);
     transcriptionMutex = false;
   }
+  const command: CommandType = await checkTranscriptionForCommand(llamaServerUrl, embeddings as CommandEmbedding[], transcription);
+  if (command === 'restart') {
+    console.log('===== RESTART =====');
+    // TODO restart the conversation
+  }
 }
 
 const cutTranscriptionEventHandler = async (event: TranscriptionEvent) => {

diff --git a/package.json b/package.json
@@ -6,6 +6,7 @@
     "license": "MIT",
     "scripts": {
         "start": "npx ts-node ./index.ts",
+        "embed": "npx ts-node ./embed.ts",
         "test-voice": "npx ts-node ./tests/voice.test.ts"
     },
     "dependencies": {

diff --git a/src/depedenciesLibrary/llm.ts b/src/depedenciesLibrary/llm.ts
@@ -77,3 +77,14 @@ export const llamaInvoke = (prompt: string, input: string, llamaServerUrl: strin
     });
   });
 }
+
+export const llamaEmbed = async (llamaServerUrl: string, content: string): Promise<number[]> => {
+  const response = await axios({
+    method: 'post',
+    url: `${llamaServerUrl}/embedding`,
+    data: {
+      content
+    }
+  });
+  return response.data.embedding;
+}
diff --git a/src/talk.ts b/src/talk.ts
@@ -1,5 +1,19 @@
 import { playAudioFile, generateAudio } from './depedenciesLibrary/voice'
-import { llamaInvoke } from './depedenciesLibrary/llm';
+import { llamaInvoke, llamaEmbed } from './depedenciesLibrary/llm';
+
+export type CommandType = 'continue' | 'restart';
+
+interface Phrase {
+  content: string;
+  embedding: number[];
+  similarity: number;
+}
+
+export interface CommandEmbedding {
+  name: CommandType;
+  threshold: number;
+  phrases: Phrase[];
+}
 
 // Talk: Greedily generate audio while completing an LLM inference
 export const talk = async (prompt: string, input: string, llamaServerUrl: string, personaConfig:string, sentenceCallback: (sentence: string) => void): Promise<string> => {
@@ -28,4 +42,48 @@ export const talk = async (prompt: string, input: string, llamaServerUrl: string
   await promisesChain;
   return response;
 
-}
+}
+
+const cosineSimilarity = (A: number[], B: number[]): number => {
+  if ((!A.length) || (!B.length) || (A.length !== B.length)) {
+    throw new Error('Invalid vectors');
+  }
+  let dotProduct = 0;
+  let magA = 0;
+  let magB = 0;
+
+  for (let i=0; i<A.length; i++) {
+    dotProduct += A[i] * B[i];
+    magA += A[i] * A[i];
+    magB += B[i] * B[i];
+  }
+  magA = Math.sqrt(magA);
+  magB = Math.sqrt(magB);
+
+  return dotProduct / (magA * magB);
+}
+
+// Check the transcription for a match to the command embeddings
+export const checkTranscriptionForCommand = async (llamaServerUrl: string, commandEmbeddings: CommandEmbedding[], transcription: string): Promise<CommandType> => {
+  if (transcription.length) {
+    // Remove punctuation from the end
+    transcription = transcription.replace(/[^\w\s]*$/, "");
+    console.log(transcription);
+    const transcriptionEmbedding = await llamaEmbed(llamaServerUrl, transcription);
+    for (const command of commandEmbeddings) {
+      command.phrases.map((phrase: Phrase) => {
+        phrase.similarity = cosineSimilarity(phrase.embedding, transcriptionEmbedding)
+      });
+      command.phrases.sort((a, b) => b.similarity - a.similarity);
+      const phrase = command.phrases[0];
+      if (phrase.similarity > command.threshold) {
+        //console.log(`transcription: "${transcription}" PASSED`);
+        //console.log(`phrase: "${phrase.content}" ${phrase.similarity}`);
+        //console.log(`greater than threshold ${command.threshold}`);
+        return command.name;
+      }
+      //console.log(`${command.name}: ${phrase.similarity}`);
+    }
+  }
+  return 'continue';
+}