Skip to content

Commit

Permalink
fix(inference): enable generated YAML matches the correct resources (#93
Browse files Browse the repository at this point in the history
)
  • Loading branch information
kaysonyu authored Sep 9, 2024
1 parent 67ba019 commit cf03ae1
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 14 deletions.
2 changes: 1 addition & 1 deletion modules/inference/example/dev/example_workspace.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
modules:
inference:
path: oci://ghcr.io/kusionstack/inference
version: 0.1.0-beta.2
version: 0.1.0-beta.3
configs:
default: {}
2 changes: 1 addition & 1 deletion modules/inference/example/dev/kcl.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "example"

[dependencies]
inference = { oci = "oci://ghcr.io/kusionstack/inference", tag = "0.1.0-beta.2" }
inference = { oci = "oci://ghcr.io/kusionstack/inference", tag = "0.1.0-beta.3" }
service = {oci = "oci://ghcr.io/kusionstack/service", tag = "0.1.0" }
kam = { git = "https://github.com/KusionStack/kam.git", tag = "0.2.0" }

Expand Down
2 changes: 1 addition & 1 deletion modules/inference/kcl.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[package]
name = "inference"
version = "0.1.0-beta.2"
version = "0.1.0-beta.3"
2 changes: 1 addition & 1 deletion modules/inference/src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ TEST?=$$(go list ./... | grep -v 'vendor')
###### chang variables below according to your own modules ###
NAMESPACE=kusionstack
NAME=inference
VERSION=0.1.0-beta.2
VERSION=0.1.0-beta.3
BINARY=../bin/kusion-module-${NAME}_${VERSION}

LOCAL_ARCH := $(shell uname -m)
Expand Down
35 changes: 25 additions & 10 deletions modules/inference/src/inference_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"kusionstack.io/kusion-module-framework/pkg/module"
"kusionstack.io/kusion-module-framework/pkg/server"
apiv1 "kusionstack.io/kusion/pkg/apis/api.kusion.io/v1"
Expand All @@ -29,6 +30,8 @@ var (
inferDeploymentSuffix = "-infer-deployment"
inferStorageSuffix = "-infer-storage"
inferServiceSuffix = "-infer-service"
inferPortSuffix = "-port"
inferContainerSuffix = "-infer-container"
)

var (
Expand All @@ -39,12 +42,17 @@ var (
defaultNumCtx int = 2048
)

var (
CalledPort = 80
OllamaPort = 11434
)

var (
OllamaType = "ollama"
)

var (
OllamaImage = "ollama"
OllamaImage = "ollama/ollama"
)

func main() {
Expand Down Expand Up @@ -232,18 +240,21 @@ func (infer *Inference) generatePodSpec(_ *module.GeneratorRequest) (v1.PodSpec,

var commandParts []string
commandParts = append(commandParts, fmt.Sprintf("echo %s > Modelfile", builder.String()))
commandParts = append(commandParts, "ollama serve & OLLAMA_SERVE_PID=$!")
commandParts = append(commandParts, "sleep 5")
commandParts = append(commandParts, fmt.Sprintf("ollama create %s -f Modelfile", infer.Model))
commandParts = append(commandParts, "wait $OLLAMA_SERVE_PID")

modelPullCmd = append(modelPullCmd, "/bin/sh", "-c", strings.Join(commandParts, " && "))
containerPort = 11434
containerPort = int32(OllamaPort)
default:
}

image := OllamaImage

volumes := []v1.Volume{
{
Name: infer.Framework + inferStorageSuffix,
Name: strings.ToLower(infer.Framework) + inferStorageSuffix,
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{},
},
Expand All @@ -252,22 +263,22 @@ func (infer *Inference) generatePodSpec(_ *module.GeneratorRequest) (v1.PodSpec,

volumeMounts := []v1.VolumeMount{
{
Name: infer.Framework + inferStorageSuffix,
Name: strings.ToLower(infer.Framework) + inferStorageSuffix,
MountPath: mountPath,
},
}

ports := []v1.ContainerPort{
{
Name: infer.Framework,
Name: strings.ToLower(infer.Framework) + inferPortSuffix,
ContainerPort: containerPort,
},
}

podSpec := v1.PodSpec{
Containers: []v1.Container{
{
Name: infer.Framework,
Name: strings.ToLower(infer.Framework) + inferContainerSuffix,
Image: image,
Ports: ports,
Command: modelPullCmd,
Expand All @@ -294,7 +305,7 @@ func (infer *Inference) generateDeployment(request *module.GeneratorRequest) (*a
APIVersion: appsv1.SchemeGroupVersion.String(),
},
ObjectMeta: metav1.ObjectMeta{
Name: infer.Framework + inferDeploymentSuffix,
Name: strings.ToLower(infer.Framework) + inferDeploymentSuffix,
Namespace: request.Project,
},
Spec: appsv1.DeploymentSpec{
Expand Down Expand Up @@ -322,10 +333,14 @@ func (infer *Inference) generateDeployment(request *module.GeneratorRequest) (*a
// generateService generates the Kubernetes Service resource for the Inference instance.
func (infer *Inference) generateService(request *module.GeneratorRequest) (*apiv1.Resource, string, error) {
// Prepare the service port for the Inference instance.
svcName := infer.Framework + inferServiceSuffix
svcName := strings.ToLower(infer.Framework) + inferServiceSuffix
svcPort := []v1.ServicePort{
{
Port: int32(80),
Port: int32(CalledPort),
TargetPort: intstr.IntOrString{
Type: intstr.Int,
IntVal: int32(OllamaPort),
},
},
}

Expand Down Expand Up @@ -359,6 +374,6 @@ func (infer *Inference) generateService(request *module.GeneratorRequest) (*apiv
// generateMatchLabels generates the match labels for the Kubernetes resources of the Inference instance.
func (infer *Inference) generateMatchLabels() map[string]string {
return map[string]string{
"accessory": infer.Framework,
"accessory": strings.ToLower(infer.Framework),
}
}

0 comments on commit cf03ae1

Please sign in to comment.