Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: adding chatformat to use for inference servers #868

Merged
merged 9 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions packages/backend/src/assets/ai.json
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/ibm/merlinite-7b-GGUF/resolve/main/merlinite-7b-Q4_K_M.gguf",
"memory": 4370129224
"memory": 4370129224,
"properties": {
"chatFormat": "openchat"
}
},
{
"id": "hf.TheBloke.mistral-7b-codealpaca-lora.Q4_K_M",
Expand Down Expand Up @@ -134,7 +137,10 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/froggeric/Cerebrum-1.0-7b-GGUF/resolve/main/Cerebrum-1.0-7b-Q4_KS.gguf",
"memory": 4144643441
"memory": 4144643441,
"properties": {
"chatFormat": "openchat"
}
},
{
"id": "hf.TheBloke.openchat-3.5-0106.Q4_K_M",
Expand Down Expand Up @@ -174,7 +180,10 @@
"registry": "Hugging Face",
"license": "Apache-2.0",
"url": "https://huggingface.co/llmware/dragon-mistral-7b-v0/resolve/main/dragon-mistral-7b-q4_k_m.gguf",
"memory": 4370129224
"memory": 4370129224,
"properties": {
"chatFormat": "openchat"
}
},
{
"id": "hf.MaziyarPanahi.MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,9 @@ describe('Create Inference Server', () => {
);
expect(taskRegistryMock.createTask).toHaveBeenNthCalledWith(
1,
'Pulling ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat:0.2.0.',
expect.stringContaining(
'Pulling ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat:',
),
'loading',
{
trackingId: 'dummyTrackingId',
Expand Down
62 changes: 62 additions & 0 deletions packages/backend/src/utils/inferenceUtils.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,68 @@ describe('generateContainerCreateOptions', () => {
},
});
});

test('model info with chat_format properties', () => {
const result = generateContainerCreateOptions(
{
port: 8888,
providerId: 'test@providerId',
image: INFERENCE_SERVER_IMAGE,
modelsInfo: [
{
id: 'dummyModelId',
file: {
file: 'dummyFile',
path: 'dummyPath',
},
properties: {
chatFormat: 'dummyChatFormat',
},
},
],
} as unknown as InferenceServerConfig,
{
Id: 'dummyImageId',
engineId: 'dummyEngineId',
RepoTags: [INFERENCE_SERVER_IMAGE],
} as unknown as ImageInfo,
);

expect(result.Env).toContain('MODEL_CHAT_FORMAT=dummyChatFormat');
});

test('model info with multiple properties', () => {
const result = generateContainerCreateOptions(
{
port: 8888,
providerId: 'test@providerId',
image: INFERENCE_SERVER_IMAGE,
modelsInfo: [
{
id: 'dummyModelId',
file: {
file: 'dummyFile',
path: 'dummyPath',
},
properties: {
basicProp: 'basicProp',
lotOfCamelCases: 'lotOfCamelCases',
lowercase: 'lowercase',
},
},
],
} as unknown as InferenceServerConfig,
{
Id: 'dummyImageId',
engineId: 'dummyEngineId',
RepoTags: [INFERENCE_SERVER_IMAGE],
} as unknown as ImageInfo,
);

expect(result.Env).toContain('MODEL_BASIC_PROP=basicProp');
expect(result.Env).toContain('MODEL_LOT_OF_CAMEL_CASES=lotOfCamelCases');
expect(result.Env).toContain('MODEL_LOWERCASE=lowercase');
});
});

describe('withDefaultConfiguration', () => {
Expand Down
14 changes: 12 additions & 2 deletions packages/backend/src/utils/inferenceUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ export const SECOND: number = 1_000_000_000;
export const LABEL_INFERENCE_SERVER: string = 'ai-lab-inference-server';

export const INFERENCE_SERVER_IMAGE =
'ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat:0.2.0';
'ghcr.io/containers/podman-desktop-extension-ai-lab-playground-images/ai-lab-playground-chat:0.3.0';

/**
* Return container connection provider
Expand Down Expand Up @@ -115,6 +115,16 @@ export function generateContainerCreateOptions(
throw new Error('The model info file provided is undefined');
}

const envs: string[] = [`MODEL_PATH=/models/${modelInfo.file.file}`, 'HOST=0.0.0.0', 'PORT=8000'];
axel7083 marked this conversation as resolved.
Show resolved Hide resolved
if (modelInfo.properties) {
envs.push(
...Object.entries(modelInfo.properties).map(([key, value]) => {
const formattedKey = key.replace(/[A-Z]/g, m => `_${m}`).toUpperCase();
return `MODEL_${formattedKey}=${value}`;
}),
);
}

return {
Image: imageInfo.Id,
Detach: true,
Expand Down Expand Up @@ -147,7 +157,7 @@ export function generateContainerCreateOptions(
...config.labels,
[LABEL_INFERENCE_SERVER]: JSON.stringify(config.modelsInfo.map(model => model.id)),
},
Env: [`MODEL_PATH=/models/${modelInfo.file.file}`, 'HOST=0.0.0.0', 'PORT=8000'],
Env: envs,
Cmd: ['--models-path', '/models', '--context-size', '700', '--threads', '4'],
};
}
Expand Down
3 changes: 3 additions & 0 deletions packages/shared/src/models/IModelInfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,7 @@ export interface ModelInfo {
file?: LocalModelInfo;
state?: 'deleting';
memory?: number;
properties?: {
[key: string]: string;
};
}