Skip to content

Commit e694cbf

Browse files
Diffusion model support using BentoML (#610)
* Integrate OCI cache with Data science hosted MCP server * Readme updates * Diffusion model support in model deployment * context
1 parent 79db059 commit e694cbf

File tree

4 files changed

+220
-0
lines changed

4 files changed

+220
-0
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
FROM python:3.13-slim
2+
RUN apt-get update -y
3+
RUN apt-get install -y curl cmake
4+
RUN apt-get install -y build-essential
5+
RUN python3 -m pip install --upgrade pip
6+
RUN pip install --upgrade torch transformers diffusers accelerate
7+
RUN pip install bentoml Pillow protobuf peft sentencepiece oci
8+
WORKDIR /opt/ds/model/deployed_model
9+
CMD ["bentoml", "serve"]
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Introduction
2+
Diffusion models are a type of generative model that learns to create new data samples by reversing a gradual process of adding noise to an initial sample. They work by first adding noise to real data points, gradually transforming them into pure noise, and then training a neural network to reverse this process, effectively learning to generate data from noise.
3+
4+
[BentoML](https://github.com/bentoml/BentoML) is a Python library for building online serving systems optimized for AI apps and model inference. WHat sets it apart from other text generation frameworks is that it can also support image generation usecase with Stable Diffusion 3 Medium, Stable Video Diffusion, Stable Diffusion XL Turbo, ControlNet, and LCM LoRAs.
5+
In this sample, we are going to deploy [Stable Diffusion 3 Medium](https://github.com/bentoml/BentoDiffusion/tree/main/sd3-medium) with BentoML
6+
7+
# Steps
8+
9+
## Dockerize
10+
First let's dockerize the our model serving framework using the [Dockerfile](./Dockerfile).
11+
```
12+
docker build -f Dockerfile -t bentoml:latest .
13+
```
14+
15+
## Create BentoML framework API code to serve Stable Diffusion 3 Medium on the framework
16+
Refer code in [directory](./sd3-medium).
17+
Note the changes done in order to support this on OCI Data Science Model Deployment.
18+
* Add readiness logic if needed, for checking health of model server.
19+
* Add route in bentoml api to support `predict` api endpoint for image generation.
20+
* Check OCI Buckets integration using resource principal to put the generated images in bucket of your choice.
21+
NOTE - In order to allow model deployment service create objects in your bucket, add the policy
22+
```
23+
allow any-user to manage objects in compartment <compartment> where ALL { request.principal.type='datasciencemodeldeployment', target.bucket.name='<BUCKET_NAME>' }
24+
```
25+
26+
## Zip the artifact and create Model catalog entry
27+
```
28+
cd sd3-medium
29+
zip -0 -r artifact.zip *
30+
```
31+
Use this zip to create simple model catalog entry and fetch the model ocid.
32+
33+
Note - Create a VCN, Subnet with internet connectivity in order to fetch the model of your choice, or choose model catalog method to bring the model along with bentoml files.
34+
35+
## Create Model deployment
36+
Create model deployment using the [file](./model-deployment.py) as reference.
37+
38+
## Prediction
39+
Once MD is active, use below curl request to send a request
40+
```
41+
oci raw-request --http-method POST --target-uri <MODEL_DEPLOYMENT_ENDPOINT> --request-body '{ "prompt": "A cat holding a sign that says hello World", "num_inference_steps": 10,"guidance_scale": 7.0 }' --request-headers '{"Content-Type":"application/json"}'
42+
```
43+
44+
Genrated image will be placed in chosen bucket.
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import oci
2+
import os
3+
import logging
4+
5+
def get_auth():
6+
PROFILE_NAME = 'DEFAULT'
7+
SECURITY_TOKEN_FILE_KEY = 'security_token_file'
8+
KEY_FILE_KEY = 'key_file'
9+
config = oci.config.from_file(profile_name=PROFILE_NAME)
10+
token_file = config[SECURITY_TOKEN_FILE_KEY]
11+
token = None
12+
with open(token_file, 'r') as f:
13+
token = f.read()
14+
private_key = oci.signer.load_private_key_from_file(config[KEY_FILE_KEY])
15+
signer = oci.auth.signers.SecurityTokenSigner(token, private_key)
16+
return signer
17+
18+
def get_datascience_client():
19+
logger.info("Getting Resource Principal authenticated in datascience client")
20+
return oci.data_science.DataScienceClient({}, signer=get_auth(), service_endpoint=service_endpoint)
21+
22+
# Set up logging
23+
_logger_name = 'MD'
24+
logger = logging.getLogger(_logger_name)
25+
logger.setLevel(logging.DEBUG)
26+
ch = logging.StreamHandler()
27+
ch.setLevel(logging.DEBUG)
28+
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', '%Y-%m-%d %H:%M:%S')
29+
ch.setFormatter(formatter)
30+
logger.addHandler(ch)
31+
32+
33+
service_endpoint = "https://datascience.us-ashburn-1.oci.oraclecloud.com"
34+
35+
compartment_id = os.getenv('COMPARTMENT_ID', None)
36+
project_id = os.getenv('PROJECT_ID', None)
37+
38+
logger.info("Setting up data-science client")
39+
data_science_client = get_datascience_client()
40+
logger.info("Data-science client set up successfully")
41+
42+
data_science_client.create_model_deployment(create_model_deployment_details = {
43+
"displayName": "Diffusion Model deployment",
44+
"projectId": project_id,
45+
"compartmentId": compartment_id,
46+
"modelDeploymentConfigurationDetails": {
47+
"deploymentType": "SINGLE_MODEL",
48+
"modelConfigurationDetails": {
49+
"modelId": <MODEL_ID>,
50+
"instanceConfiguration": {
51+
"instanceShapeName": "VM.GPU.A10.2",
52+
"modelDeploymentInstanceShapeConfigDetails": None,
53+
"subnetId": "<SUBNET_ID>",
54+
"privateEndpointId": None
55+
},
56+
"scalingPolicy": {
57+
"policyType": "FIXED_SIZE",
58+
"instanceCount": 1
59+
},
60+
"bandwidthMbps": 10,
61+
"maximumBandwidthMbps": 10
62+
},
63+
"streamConfigurationDetails": {
64+
"inputStreamIds": None,
65+
"outputStreamIds": None
66+
},
67+
"environmentConfigurationDetails": {
68+
"environmentConfigurationType": "OCIR_CONTAINER",
69+
"image": <CONTAINER_ID>,
70+
"imageDigest": <DIGEST>,
71+
"cmd": None,
72+
"entrypoint": None,
73+
"serverPort": 3000,
74+
"healthCheckPort": 3000,
75+
"environmentConfigurationDetails": {
76+
"environmentConfigurationType": "OCIR_CONTAINER",
77+
"image": "<IMAGE_ID>",
78+
"imageDigest": "<DIGEST>",
79+
"cmd": None,
80+
"entrypoint": None,
81+
"serverPort": 3000,
82+
"healthCheckPort": 3000,
83+
"environmentVariables": {
84+
"MODEL_DEPLOY_HEALTH_ENDPOINT": "/readyz",
85+
"SHM_SIZE": "10g",
86+
"HF_TOKEN": "<HF_TOKEN_FOR_MODEL_DOWNLOAD>" # No need if using cataloged model
87+
}
88+
}
89+
}
90+
},
91+
"categoryLogDetails": {
92+
"access": {
93+
"logId": <LOG_GROUP_ID>,
94+
"logGroupId": <LOG_ID>
95+
},
96+
"predict": {
97+
"logId": <LOG_GROUP_ID>,
98+
"logGroupId": <LOG_ID>
99+
}
100+
},
101+
"freeformTags": {},
102+
"definedTags": {}
103+
})
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import typing as t
2+
import bentoml
3+
from PIL.Image import Image
4+
from annotated_types import Le, Ge
5+
from typing_extensions import Annotated
6+
import oci
7+
import io
8+
import os
9+
10+
11+
MODEL_ID = "stabilityai/stable-diffusion-3-medium-diffusers"
12+
13+
sample_prompt = "A cat holding a sign that says hello world"
14+
15+
def get_oss_client():
16+
print("Getting Resource Principal authenticated in ObjectStorage client")
17+
return oci.object_storage.ObjectStorageClient({}, signer=oci.auth.signers.get_resource_principals_signer(), service_endpoint="https://objectstorage.us-ashburn-1.oraclecloud.com")
18+
19+
object_storage_client = get_oss_client()
20+
21+
@bentoml.service(
22+
traffic={"timeout": 300},
23+
workers=1,
24+
resources={
25+
"gpu": 1,
26+
"gpu_type": "nvidia-l4",
27+
},
28+
)
29+
class SD3Medium:
30+
def __init__(self) -> None:
31+
import torch
32+
from diffusers import StableDiffusion3Pipeline
33+
34+
self.pipe = StableDiffusion3Pipeline.from_pretrained(
35+
MODEL_ID,
36+
torch_dtype=torch.float16,
37+
)
38+
self.pipe.to(device="cuda")
39+
40+
def __is_ready__(self) -> bool:
41+
return True
42+
43+
@bentoml.api(route="/predict")
44+
def txt2img(
45+
self,
46+
prompt: str = sample_prompt,
47+
negative_prompt: t.Optional[str] = None,
48+
num_inference_steps: Annotated[int, Ge(1), Le(50)] = 28,
49+
guidance_scale: float = 7.0,
50+
) -> Image:
51+
image = self.pipe(
52+
prompt=prompt,
53+
negative_prompt=negative_prompt,
54+
num_inference_steps=num_inference_steps,
55+
guidance_scale=guidance_scale,
56+
).images[0]
57+
namespace = os.getEnv("NAMESPACE")
58+
bucketName = os.getEnv("BUCKET_NAME")
59+
objectName = "image.jpg"
60+
in_mem_file = io.BytesIO()
61+
image.save(in_mem_file, "png")
62+
in_mem_file.seek(0)
63+
put_object_response = object_storage_client.put_object(namespace, bucketName, objectName, in_mem_file)
64+
return put_object_response.headers

0 commit comments

Comments
 (0)