Skip to content

Commit

Permalink
fix(chat): Improved 'chat with emails' response quality by sorting an…
Browse files Browse the repository at this point in the history
…d filtering embeddings vectors
  • Loading branch information
andris9 committed Oct 2, 2023
1 parent cea30a5 commit de429d6
Show file tree
Hide file tree
Showing 9 changed files with 340 additions and 92 deletions.
2 changes: 1 addition & 1 deletion data/google-crawlers.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"creationTime": "2023-09-19T22:00:59.000000",
"creationTime": "2023-09-26T22:00:50.000000",
"prefixes": [
{
"ipv6Prefix": "2001:4860:4801:2008::/64"
Expand Down
359 changes: 280 additions & 79 deletions lib/api-routes/chat-routes.js

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions lib/es.js
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,9 @@ const embeddingsMappings = {
chunks: {
type: 'integer'
},
date: {
type: 'date'
},
created: {
type: 'date'
}
Expand Down
6 changes: 4 additions & 2 deletions lib/mailbox.js
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,8 @@ class Mailbox {
subject: messageInfo.subject,
text: messageInfo.text.plain,
html: messageInfo.text.html
}
},
account: this.connection.account
},
timeout: 2 * 60 * 1000
});
Expand Down Expand Up @@ -1150,7 +1151,8 @@ class Mailbox {
subject: messageInfo.subject,
text: messageInfo.text.plain,
html: messageInfo.text.html
}
},
account: this.connection.account
},
timeout: 2 * 60 * 1000
});
Expand Down
12 changes: 6 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"main": "server.js",
"scripts": {
"start": "node server.js",
"dev": "EENGINE_LOG_RAW=true node --tls-keylog=keylog.txt server --dbs.redis='redis://127.0.0.1:6379/9' --api.port=7003 --api.host=0.0.0.0 | tee $HOME/dev.txt | pino-pretty",
"single": "EENGINE_LOG_RAW=false EENGINE_WORKERS=1 node --inspect server --dbs.redis='redis://127.0.0.1:6379/10' --api.port=7009 --api.host=0.0.0.0 | tee $HOME/log.txt | pino-pretty",
"dev": "EE_OPENAPI_VERBOSE=true EENGINE_LOG_RAW=true node --tls-keylog=keylog.txt server --dbs.redis='redis://127.0.0.1:6379/9' --api.port=7003 --api.host=0.0.0.0 | tee $HOME/dev.txt | pino-pretty",
"single": "EE_OPENAPI_VERBOSE=true EENGINE_LOG_RAW=false EENGINE_WORKERS=1 node --inspect server --dbs.redis='redis://127.0.0.1:6379/10' --api.port=7009 --api.host=0.0.0.0 | tee $HOME/log.txt | pino-pretty",
"raw": "NODE_OPTIONS=--no-experimental-fetch EENGINE_LOG_RAW=true node server --dbs.redis='redis://127.0.0.1:6379/9' | eerawlog",
"test": "grunt && node --test test/",
"swagger": "./getswagger.sh",
Expand Down Expand Up @@ -50,14 +50,14 @@
"@hapi/vision": "7.0.3",
"@phc/pbkdf2": "1.1.14",
"@postalsys/certs": "1.0.5",
"@postalsys/email-ai-tools": "1.5.1",
"@postalsys/email-ai-tools": "1.6.1",
"@postalsys/email-text-tools": "2.1.1",
"@postalsys/hecks": "3.0.0-fork.3",
"@postalsys/templates": "1.0.5",
"ace-builds": "1.28.0",
"base32.js": "0.1.0",
"bull-arena": "4.0.1",
"bullmq": "4.11.4",
"bullmq": "4.12.0",
"compare-versions": "6.1.0",
"dotenv": "16.3.1",
"encoding-japanese": "2.0.0",
Expand Down Expand Up @@ -93,7 +93,7 @@
"nanoid": "3.3.4",
"node-gettext": "3.0.0",
"nodemailer": "6.9.5",
"pino": "8.15.1",
"pino": "8.15.3",
"prom-client": "14.2.0",
"psl": "1.9.0",
"pubface": "1.0.6",
Expand All @@ -104,7 +104,7 @@
"speakeasy": "2.0.0",
"startbootstrap-sb-admin-2": "3.3.7",
"timezones-list": "3.0.2",
"undici": "5.25.2",
"undici": "5.25.3",
"uuid": "9.0.1",
"wild-config": "1.7.0",
"xml2js": "0.6.2"
Expand Down
42 changes: 41 additions & 1 deletion server.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ const {
generateEmbeddings,
getChunkEmbeddings,
embeddingsQuery,
questionQuery,
DEFAULT_USER_PROMPT: openAiDefaultPrompt
} = require('@postalsys/email-ai-tools');
const { fetch: fetchCmd, Agent } = require('undici');
Expand Down Expand Up @@ -1415,6 +1416,8 @@ async function onCommand(worker, message) {
break;
}

requestOpts.user = message.data.account;

let userPrompt = message.data.openAiPrompt || ((await settings.get('openAiPrompt')) || '').toString();
if (userPrompt.trim()) {
requestOpts.userPrompt = userPrompt;
Expand All @@ -1435,6 +1438,8 @@ async function onCommand(worker, message) {
throw new Error(`OpenAI API key is not set`);
}

requestOpts.user = message.data.account;

const embeddings = await generateEmbeddings(message.data.message, openAiAPIKey, requestOpts);
if (!Array.isArray(embeddings?.embeddings)) {
return false;
Expand Down Expand Up @@ -1478,8 +1483,12 @@ async function onCommand(worker, message) {
break;
}

requestOpts.user = message.data.account;
requestOpts.temperature = 0.4;

requestOpts.question = message.data.question;
requestOpts.contextChunks = message.data.contextChunks;
requestOpts.userData = message.data.userData;

let response = await embeddingsQuery(openAiAPIKey, requestOpts);

Expand All @@ -1488,7 +1497,7 @@ async function onCommand(worker, message) {
delete response?.['Message-ID'];
}
if (response?.messageId) {
response.messageId = (response?.messageId || '').toString().trim().replace(/^<?/, '<').replace(/>?$/, '>');
response.messageId = [].concat(response?.messageId || []).map(value => (value || '').toString().trim().replace(/^<?/, '<').replace(/>?$/, '>'));
}

if (response?.answer) {
Expand All @@ -1508,6 +1517,35 @@ async function onCommand(worker, message) {
return response;
}

case 'questionQuery': {
let requestOpts = {
verbose: getBoolean(process.env.EE_OPENAPI_VERBOSE)
};

let openAiAPIKey = message.data.openAiAPIKey || (await settings.get('openAiAPIKey'));

if (!openAiAPIKey) {
throw new Error(`OpenAI API key is not set`);
}

let openAiModel = message.data.openAiModel || 'gpt-3.5-turbo-instruct';
if (openAiModel) {
requestOpts.gptModel = openAiModel;
}

requestOpts.user = message.data.account;

let response = await questionQuery(message.data.question, openAiAPIKey, requestOpts);

for (const key of Object.keys(response)) {
if (/^_/.test(key)) {
delete response[key];
}
}

return response;
}

// run these in main process to avoid polluting RAM with the memory hungry tokenization library
case 'generateChunkEmbeddings': {
let requestOpts = {
Expand All @@ -1520,6 +1558,8 @@ async function onCommand(worker, message) {
throw new Error(`OpenAI API key is not set`);
}

requestOpts.user = message.data.account;

const data = await getChunkEmbeddings(message.data.message, openAiAPIKey, requestOpts);

return data;
Expand Down
2 changes: 1 addition & 1 deletion static/js/ace/ace.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion static/js/ace/ext-language_tools.js

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion workers/documents.js
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,8 @@ const documentsWorker = new Worker(
subject: messageData.subject,
text: messageData.text.plain,
html: messageData.text.html
}
},
account: job.data.account
},
timeout: 5 * 60 * 1000
});
Expand All @@ -530,6 +531,7 @@ const documentsWorker = new Worker(
model: embeddings.model,
chunkNr: i,
chunks: embeddings.embeddings.length,
date: messageData.date,
created: new Date()
}));

Expand Down

0 comments on commit de429d6

Please sign in to comment.