Skip to content

Commit

Permalink
Use lite version of @dqbd/tiktoken
Browse files Browse the repository at this point in the history
  • Loading branch information
dqbd committed May 10, 2023
1 parent a2f6860 commit 0392600
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 4 deletions.
25 changes: 22 additions & 3 deletions langchain/src/base_language/count_tokens.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,26 @@ interface CalculateMaxTokenProps {
modelName: TiktokenModel;
}

export const importTiktoken = async () => {
export const importTiktoken = /* @__PURE__ */ async () => {
try {
const { encoding_for_model } = await import("@dqbd/tiktoken");
const [{ Tiktoken }, { load }, { default: registry }, { default: models }] =
await Promise.all([
import("@dqbd/tiktoken/lite"),
import("@dqbd/tiktoken/load"),
import("@dqbd/tiktoken/registry.json"),
import("@dqbd/tiktoken/model_to_encoding.json"),
]);

const encoding_for_model = async (modelName: TiktokenModel) => {
const model = await load(
registry[
models[modelName as keyof typeof models] as keyof typeof registry
]
);

return new Tiktoken(model.bpe_ranks, model.special_tokens, model.pat_str);
};

return { encoding_for_model };
} catch (error) {
console.log(error);
Expand All @@ -78,7 +95,9 @@ export const calculateMaxTokens = async ({

try {
if (encoding_for_model) {
const encoding = encoding_for_model(getModelNameForTiktoken(modelName));
const encoding = await encoding_for_model(
getModelNameForTiktoken(modelName)
);

const tokenized = encoding.encode(prompt);

Expand Down
2 changes: 1 addition & 1 deletion langchain/src/base_language/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ export abstract class BaseLanguageModel
// modelName only exists in openai subclasses, but tiktoken only supports
// openai tokenisers anyway, so for other subclasses we default to gpt2
if (encoding_for_model) {
this._encoding = encoding_for_model(
this._encoding = await encoding_for_model(
"modelName" in this
? getModelNameForTiktoken(this.modelName as string)
: "gpt2"
Expand Down
1 change: 1 addition & 0 deletions langchain/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"noUnusedParameters": true,
"useDefineForClassFields": true,
"strictPropertyInitialization": false,
"resolveJsonModule": true,
"allowJs": true,
"strict": true
},
Expand Down

0 comments on commit 0392600

Please sign in to comment.