diff --git a/.changeset/README.md b/.changeset/README.md new file mode 100644 index 0000000..e5b6d8d --- /dev/null +++ b/.changeset/README.md @@ -0,0 +1,8 @@ +# Changesets + +Hello and welcome! This folder has been automatically generated by `@changesets/cli`, a build tool that works +with multi-package repos, or single-package repos to help you version and publish your code. You can +find the full documentation for it [in our repository](https://github.com/changesets/changesets) + +We have a quick list of common questions to get you started engaging with this project in +[our documentation](https://github.com/changesets/changesets/blob/main/docs/common-questions.md) diff --git a/.changeset/config.json b/.changeset/config.json new file mode 100644 index 0000000..5b42be8 --- /dev/null +++ b/.changeset/config.json @@ -0,0 +1,11 @@ +{ + "$schema": "https://unpkg.com/@changesets/config@3.0.3/schema.json", + "changelog": "@changesets/cli/changelog", + "commit": true, + "fixed": [], + "linked": [], + "access": "public", + "baseBranch": "master", + "updateInternalDependencies": "patch", + "ignore": [] +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..84c1263 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.DS_Store +node_modules +.turbo +*.log +.next +dist +dist-ssr +*.local +.env +.cache +server/dist +public/dist +.turbo +test-results \ No newline at end of file diff --git a/.npmrc b/.npmrc new file mode 100644 index 0000000..60ddfd0 --- /dev/null +++ b/.npmrc @@ -0,0 +1,2 @@ +auto-install-peers = true +strict-peer-dependencies = false diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..c9ae50d --- /dev/null +++ b/.prettierrc @@ -0,0 +1,7 @@ +{ + "semi": true, + "singleQuote": true, + "trailingComma": "all", + "printWidth": 80, + "tabWidth": 2 +} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..dfe5686 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,7 @@ +# voyage-ai-provider + +## 0.0.1 + +### Patch Changes + +- First public release diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..51fca54 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,11 @@ +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/bun.lockb b/bun.lockb new file mode 100644 index 0000000..f8c2a9a Binary files /dev/null and b/bun.lockb differ diff --git a/package.json b/package.json new file mode 100644 index 0000000..9701207 --- /dev/null +++ b/package.json @@ -0,0 +1,78 @@ +{ + "name": "voyage-ai-provider", + "version": "0.0.1", + "description": "Voyage AI Provider for running Voyage AI models with Vercel AI SDK", + "author": "Vivek Patel ", + "license": "Apache-2.0", + "keywords": [ + "ai", + "vercel-ai", + "voyage", + "embeddings" + ], + "main": "./dist/index.js", + "module": "./dist/index.mjs", + "types": "./dist/index.d.ts", + "files": [ + "dist/**/*", + "CHANGELOG.md" + ], + "scripts": { + "build": "tsup", + "clean": "rm -rf dist", + "check-exports": "attw --pack .", + "dev": "tsup --watch", + "lint": "eslint \"./**/*.ts*\"", + "type-check": "tsc --noEmit", + "format": "prettier --write .", + "check-format": "prettier --check .", + "test": "bun test:node && bun test:edge", + "test:edge": "vitest --config vitest.edge.config.js --run", + "test:node": "vitest --config vitest.node.config.js --run", + "ci": "bun run build && bun run check-format && bun run test", + "local-release": "changeset version && changeset publish", + "prepublishOnly": "npm run ci" + }, + "exports": { + "./package.json": "./package.json", + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.mjs", + "require": "./dist/index.js" + } + }, + "dependencies": { + "@ai-sdk/provider": "^1.0.0", + "@ai-sdk/provider-utils": "^2.0.0" + }, + "devDependencies": { + "@changesets/cli": "^2.27.9", + "@edge-runtime/vm": "^3.2.0", + "@types/node": "^18.19.64", + "prettier": "^3.3.3", + "tsup": "^8.3.5", + "typescript": "5.5.4", + "vite-tsconfig-paths": "^4.3.2", + "vitest": "^2.1.5", + "zod": "^3.23.8" + }, + "peerDependencies": { + "zod": "^3.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + }, + "publishConfig": { + "access": "public" + }, + "homepage": "https://github.com/patelvivekdev/voyage-ai-provider", + "repository": { + "type": "git", + "url": "git+https://github.com/patelvivekdev/voyage-ai-provider.git" + }, + "bugs": { + "url": "https://github.com/patelvivekdev/voyage-ai-provider/issues" + } +} diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..2173da6 --- /dev/null +++ b/readme.md @@ -0,0 +1,83 @@ +# AI SDK - Voyage AI Provider + +## Introduction + +The Voyage AI Provider is a provider for the AI SDK. It provides a simple interface to the Voyage AI API. + +## Installation + +```bash +npm install voyage-ai-provider + +# or + +yarn add voyage-ai-provider + +# or + +pnpm add voyage-ai-provider + +# or + +bun add voyage-ai-provider +``` + +## Configuration + +The Voyage AI Provider requires an API key to be configured. You can obtain an API key by signing up at [Voyage](https://voyageai.com). + +add the following to your `.env` file: + +```bash +VOYAGE_API_KEY=your-api-key +``` + +## Usage + +```typescript +import { voyage } from 'voyage-ai-provider'; +import { embedMany } from 'ai'; + +const embeddingModel = voyage.textEmbeddingModel('voyage-3-lite'); + +export const generateEmbeddings = async ( + value: string, +): Promise> => { + // Generate chunks from the input value + const chunks = value.split('\n'); + + // Optional: You can also split the input value by comma + // const chunks = value.split('.'); + + // Or you can use LLM to generate chunks(summarize) from the input value + + const { embeddings } = await embedMany({ + model: embeddingModel, + values: chunks, + }); + return embeddings.map((e, i) => ({ content: chunks[i], embedding: e })); +}; +``` + +### Add settings to the model + +The settings object should contain the settings you want to add to the model. You can find the available settings for the model in the Voyage API documentation: https://docs.voyageai.com/reference/embeddings-api + +```typescript +const voyage = createVoyage({ + apiKey: process.env.VOYAGE_API_KEY, +}); + +// Initialize the embedding model +const embeddingModel = voyage.textEmbeddingModel( + 'voyage-3-lite', + // adding settings + { + inputType: 'document', + }, +); +``` + +## Authors + +- [patelvivekdev](https://patelvivek.dev) diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..41f2a26 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,2 @@ +export { createVoyage, voyage } from './voyage-provider'; +export type { VoyageProvider, VoyageProviderSettings } from './voyage-provider'; diff --git a/src/voyage-embedding-model.test.ts b/src/voyage-embedding-model.test.ts new file mode 100644 index 0000000..b1cce88 --- /dev/null +++ b/src/voyage-embedding-model.test.ts @@ -0,0 +1,130 @@ +import { EmbeddingModelV1Embedding } from '@ai-sdk/provider'; +import { JsonTestServer } from '@ai-sdk/provider-utils/test'; +import { createVoyage } from './voyage-provider'; + +const dummyEmbeddings = [ + [0.1, 0.2, 0.3, 0.4, 0.5], + [0.6, 0.7, 0.8, 0.9, 1], +]; +const testValues = ['sunny day at the beach', 'rainy day in the city']; + +const provider = createVoyage({ + baseURL: 'https://api.voyage.ai/v1', + apiKey: 'test-api-key', +}); +const model = provider('voyage-3-lite'); + +describe('doEmbed', () => { + const server = new JsonTestServer('https://api.voyage.ai/v1/embeddings'); + + server.setupTestEnvironment(); + + function prepareJsonResponse({ + embeddings = dummyEmbeddings, + usage = { + prompt_tokens: 4, + total_tokens: 12, + }, + }: { + embeddings?: EmbeddingModelV1Embedding[]; + usage?: { prompt_tokens: number; total_tokens: number }; + } = {}) { + server.responseBodyJson = { + object: 'list', + data: embeddings.map((embedding, i) => ({ + object: 'embedding', + embedding, + index: i, + })), + model: 'voyage-3-lite', + normalized: true, + encoding_format: 'float', + usage, + }; + } + + it('should extract embedding', async () => { + prepareJsonResponse(); + + const { embeddings } = await model.doEmbed({ values: testValues }); + + expect(embeddings).toStrictEqual(dummyEmbeddings); + }); + + it('should expose the raw response headers', async () => { + prepareJsonResponse(); + + server.responseHeaders = { + 'test-header': 'test-value', + }; + + const { rawResponse } = await model.doEmbed({ values: testValues }); + + expect(rawResponse?.headers).toStrictEqual({ + 'content-length': '272', + // default headers: + 'content-type': 'application/json', + + // custom header + 'test-header': 'test-value', + }); + }); + + it('should pass the model and the values', async () => { + prepareJsonResponse(); + + await model.doEmbed({ values: testValues }); + + expect(await server.getRequestBodyJson()).toStrictEqual({ + input: testValues, + model: 'voyage-3-lite', + }); + }); + + it('should pass the settings ', async () => { + prepareJsonResponse(); + + const voyage = createVoyage({ + baseURL: 'https://api.voyage.ai/v1', + apiKey: 'test-api-key', + }); + + await voyage + .textEmbeddingModel('voyage-3-lite', { + inputType: 'document', + }) + .doEmbed({ + values: testValues, + }); + + expect(await server.getRequestBodyJson()).toStrictEqual({ + input: testValues, + model: 'voyage-3-lite', + input_type: 'document', + }); + }); + + it('should pass custom headers', async () => { + prepareJsonResponse(); + + const voyage = createVoyage({ + baseURL: 'https://api.voyage.ai/v1', + apiKey: 'test-api-key', + headers: { + 'Custom-Header': 'test-header', + }, + }); + + await voyage.textEmbeddingModel('voyage-3-lite').doEmbed({ + values: testValues, + }); + + const requestHeaders = await server.getRequestHeaders(); + + expect(requestHeaders).toStrictEqual({ + authorization: 'Bearer test-api-key', + 'content-type': 'application/json', + 'custom-header': 'test-header', + }); + }); +}); diff --git a/src/voyage-embedding-model.ts b/src/voyage-embedding-model.ts new file mode 100644 index 0000000..72a5c87 --- /dev/null +++ b/src/voyage-embedding-model.ts @@ -0,0 +1,102 @@ +import { + EmbeddingModelV1, + TooManyEmbeddingValuesForCallError, +} from '@ai-sdk/provider'; +import { + createJsonResponseHandler, + postJsonToApi, +} from '@ai-sdk/provider-utils'; +import { z } from 'zod'; + +import { + VoyageEmbeddingModelId, + VoyageEmbeddingSettings, +} from '@/voyage-embedding-settings'; +import { voyageFailedResponseHandler } from '@/voyage-error'; +import { encode } from 'punycode'; + +type VoyageEmbeddingConfig = { + baseURL: string; + fetch?: typeof fetch; + headers: () => Record; + provider: string; +}; + +export class VoyageEmbeddingModel implements EmbeddingModelV1 { + readonly specificationVersion = 'v1'; + readonly modelId: VoyageEmbeddingModelId; + + private readonly config: VoyageEmbeddingConfig; + private readonly settings: VoyageEmbeddingSettings; + + get provider(): string { + return this.config.provider; + } + + get maxEmbeddingsPerCall(): number { + return 128; + } + + get supportsParallelCalls(): boolean { + return false; + } + + constructor( + modelId: VoyageEmbeddingModelId, + settings: VoyageEmbeddingSettings, + config: VoyageEmbeddingConfig, + ) { + this.modelId = modelId; + this.settings = settings; + this.config = config; + } + + async doEmbed({ + abortSignal, + values, + }: Parameters['doEmbed']>[0]): Promise< + Awaited['doEmbed']>> + > { + if (values.length > this.maxEmbeddingsPerCall) { + throw new TooManyEmbeddingValuesForCallError({ + maxEmbeddingsPerCall: this.maxEmbeddingsPerCall, + modelId: this.modelId, + provider: this.provider, + values, + }); + } + + const { responseHeaders, value: response } = await postJsonToApi({ + abortSignal, + body: { + input: values, + model: this.modelId, + input_type: this.settings.inputType, + encoding_format: this.settings.encodingFormat, + truncation: this.settings.truncation, + }, + failedResponseHandler: voyageFailedResponseHandler, + fetch: this.config.fetch, + headers: this.config.headers(), + successfulResponseHandler: createJsonResponseHandler( + voyageTextEmbeddingResponseSchema, + ), + url: `${this.config.baseURL}/embeddings`, + }); + + return { + embeddings: response.data.map((item) => item.embedding), + usage: response.usage + ? { tokens: response.usage.total_tokens } + : undefined, + rawResponse: { headers: responseHeaders }, + }; + } +} + +// minimal version of the schema, focussed on what is needed for the implementation +// this approach limits breakages when the API changes and increases efficiency +const voyageTextEmbeddingResponseSchema = z.object({ + data: z.array(z.object({ embedding: z.array(z.number()) })), + usage: z.object({ total_tokens: z.number() }).nullish(), +}); diff --git a/src/voyage-embedding-settings.ts b/src/voyage-embedding-settings.ts new file mode 100644 index 0000000..624d6af --- /dev/null +++ b/src/voyage-embedding-settings.ts @@ -0,0 +1,38 @@ +export type VoyageEmbeddingModelId = + | 'voyage-3' + | 'voyage-3-lite' + | 'voyage-finance-2' + | 'voyage-multilingual-2' + | 'voyage-law-2' + | 'voyage-code-2' + + // Older models + | 'voyage-large-2-instruct' + | 'voyage-large-2' + | 'voyage-2' + | 'voyage-02' + | 'voyage-01' + | 'voyage-lite-01' + | (string & NonNullable); + +export interface VoyageEmbeddingSettings { + /** + * The input type for the embeddings. Defaults to "query". + * For query, the prompt is "Represent the query for retrieving supporting documents: ". + * For document, the prompt is "Represent the document for retrieval: ". + */ + + inputType?: 'query' | 'document'; + + /** + * Format in which the embeddings are encoded. We support two options: + * If not specified (defaults to null): the embeddings are represented as lists of floating-point numbers; + * base64: the embeddings are compressed to base64 encodings. + */ + encodingFormat?: 'base64'; + + /** + * Whether to truncate the input texts to fit within the context length. + */ + truncation?: boolean; +} diff --git a/src/voyage-error.ts b/src/voyage-error.ts new file mode 100644 index 0000000..b880cbb --- /dev/null +++ b/src/voyage-error.ts @@ -0,0 +1,18 @@ +import { createJsonErrorResponseHandler } from '@ai-sdk/provider-utils'; +import { z } from 'zod'; + +const voyageErrorDataSchema = z.object({ + error: z.object({ + code: z.string().nullable(), + message: z.string(), + param: z.any().nullable(), + type: z.string(), + }), +}); + +export type VoyageErrorData = z.infer; + +export const voyageFailedResponseHandler = createJsonErrorResponseHandler({ + errorSchema: voyageErrorDataSchema, + errorToMessage: (data) => data.error.message, +}); diff --git a/src/voyage-provider.ts b/src/voyage-provider.ts new file mode 100644 index 0000000..327d0bc --- /dev/null +++ b/src/voyage-provider.ts @@ -0,0 +1,128 @@ +import { + EmbeddingModelV1, + LanguageModelV1, + ProviderV1, +} from '@ai-sdk/provider'; +import { + FetchFunction, + loadApiKey, + withoutTrailingSlash, +} from '@ai-sdk/provider-utils'; +import { VoyageEmbeddingModel } from './voyage-embedding-model'; +import { + VoyageEmbeddingModelId, + VoyageEmbeddingSettings, +} from './voyage-embedding-settings'; + +export interface VoyageProvider extends ProviderV1 { + ( + modelId: VoyageEmbeddingModelId, + settings?: VoyageEmbeddingSettings, + ): EmbeddingModelV1; + + /** + @deprecated Use `textEmbeddingModel()` instead. + */ + embedding( + modelId: VoyageEmbeddingModelId, + settings?: VoyageEmbeddingSettings, + ): EmbeddingModelV1; + + /** + @deprecated Use `textEmbeddingModel()` instead. + */ + textEmbedding( + modelId: VoyageEmbeddingModelId, + settings?: VoyageEmbeddingSettings, + ): EmbeddingModelV1; + + textEmbeddingModel: ( + modelId: VoyageEmbeddingModelId, + settings?: VoyageEmbeddingSettings, + ) => EmbeddingModelV1; +} + +export interface VoyageProviderSettings { + /** + Use a different URL prefix for API calls, e.g. to use proxy servers. + The default prefix is `https://api.voyageai.com/v1`. + */ + baseURL?: string; + + /** + API key that is being send using the `Authorization` header. + It defaults to the `VOYAGE_API_KEY` environment variable. + */ + apiKey?: string; + + /** + Custom headers to include in the requests. + */ + headers?: Record; + + /** + Custom fetch implementation. You can use it as a middleware to intercept requests, + or to provide a custom fetch implementation for e.g. testing. + */ + fetch?: FetchFunction; +} + +/** + Create a Voyage AI provider instance. + */ +export function createVoyage( + options: VoyageProviderSettings = {}, +): VoyageProvider { + const baseURL = + withoutTrailingSlash(options.baseURL) ?? 'https://api.voyageai.com/v1'; + + const getHeaders = () => ({ + Authorization: `Bearer ${loadApiKey({ + apiKey: options.apiKey, + environmentVariableName: 'VOYAGE_API_KEY', + description: 'Voyage', + })}`, + ...options.headers, + }); + + const createEmbeddingModel = ( + modelId: VoyageEmbeddingModelId, + settings: VoyageEmbeddingSettings = {}, + ) => + new VoyageEmbeddingModel(modelId, settings, { + provider: 'voyage.embedding', + baseURL, + headers: getHeaders, + fetch: options.fetch, + }); + + const provider = function ( + modelId: VoyageEmbeddingModelId, + settings?: VoyageEmbeddingSettings, + ) { + if (new.target) { + throw new Error( + 'The Voyage model function cannot be called with the new keyword.', + ); + } + + return createEmbeddingModel(modelId, settings); + }; + + provider.embedding = createEmbeddingModel; + provider.textEmbedding = createEmbeddingModel; + provider.textEmbeddingModel = createEmbeddingModel; + + provider.chat = provider.languageModel = ( + modelId: string, + ): LanguageModelV1 => { + throw new Error('languageModel method is not implemented.'); + }; + + return provider as VoyageProvider; +} + +/** + Default Voyage provider instance. + */ +export const voyage = createVoyage(); diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..afc64e8 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,29 @@ +{ + "$schema": "https://json.schemastore.org/tsconfig", + "compilerOptions": { + "composite": false, + "declaration": true, + "declarationMap": true, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "inlineSources": false, + "isolatedModules": true, + "moduleResolution": "node", + "noUnusedLocals": false, + "noUnusedParameters": false, + "preserveWatchOutput": true, + "skipLibCheck": true, + "strict": true, + "types": ["@types/node", "vitest/globals"], + "jsx": "react-jsx", + "lib": ["dom", "ES2021"], + "module": "ESNext", + "target": "ES2018", + "stripInternal": true, + "paths": { + "@/*": ["./src/*"] + } + }, + "include": ["."], + "exclude": ["dist", "build", "node_modules"] +} diff --git a/tsup.config.ts b/tsup.config.ts new file mode 100644 index 0000000..fa5392f --- /dev/null +++ b/tsup.config.ts @@ -0,0 +1,11 @@ +import { defineConfig } from 'tsup'; + +export default defineConfig([ + { + entry: ['src/index.ts'], + format: ['cjs', 'esm'], + dts: true, + sourcemap: true, + clean: true, + }, +]); diff --git a/vitest.edge.config.js b/vitest.edge.config.js new file mode 100644 index 0000000..9c43c68 --- /dev/null +++ b/vitest.edge.config.js @@ -0,0 +1,12 @@ +import tsconfigPaths from 'vite-tsconfig-paths'; +import { defineConfig } from 'vitest/config'; + +// https://vitejs.dev/config/ +export default defineConfig({ + plugins: [tsconfigPaths()], + test: { + environment: 'edge-runtime', + globals: true, + include: ['**/*.test.ts', '**/*.test.tsx'], + }, +}); diff --git a/vitest.node.config.js b/vitest.node.config.js new file mode 100644 index 0000000..585f6b1 --- /dev/null +++ b/vitest.node.config.js @@ -0,0 +1,12 @@ +import tsconfigPaths from 'vite-tsconfig-paths'; +import { defineConfig } from 'vitest/config'; + +// https://vitejs.dev/config/ +export default defineConfig({ + plugins: [tsconfigPaths()], + test: { + environment: 'node', + globals: true, + include: ['**/*.test.ts', '**/*.test.tsx'], + }, +});