Skip to content

Commit

Permalink
Add basic preflight checks as the first component in the pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
JudeNiroshan committed Sep 12, 2024
1 parent 06b0ce2 commit 19bfb68
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 32 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,12 @@ To deploy a signed certificate in cluster follow [trusted cluster cert](signed-c
### Object Storage
This solution requires object storage to be in place either through S3 or using Noobaa.

If you are using Noobaa apply the following [tuning paramters](noobaa/README.md)
If you are using Noobaa apply the following [tuning paramters](noobaa/README.md)

## How to run 🏃🏼

1. Create K8s config map and K8s secret based on the target Model Server Info. Use [kfp-model-server.yaml](./sdg/kfp-model-server.yaml).

2. Use pipeline.py file to generate the pipeline.yaml which will create RHOAI pipeline.

3. Create a run in RHOAI by providing required input parameter values.
12 changes: 9 additions & 3 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
def pipeline_wrapper(mock: List[Literal[MOCKED_STAGES]]):
"""Wrapper for KFP pipeline, which allows for mocking individual stages."""
if 'sdg' in mock:
from sdg.faked import git_clone_op, sdg_op
from sdg.faked import preflight_check_op, git_clone_op, sdg_op
else:
from sdg import git_clone_op, sdg_op
from sdg import preflight_check_op, git_clone_op, sdg_op


@dsl.pipeline(
Expand All @@ -27,9 +27,15 @@ def pipeline(
repo_branch: Optional[str] = None,
repo_pr: Optional[int] = None,
):
preflight_check_task = preflight_check_op(
repo_branch=repo_branch, repo_pr=repo_pr
)
use_config_map_as_env(preflight_check_task, K8S_NAME, dict(endpoint="endpoint", model="model"))
use_secret_as_env(preflight_check_task, K8S_NAME, {"api_key": "api_key"})

git_clone_task = git_clone_op(
repo_branch=repo_branch, repo_pr=repo_pr, repo_url=repo_url
)
).after(preflight_check_task)

sdg_task = sdg_op(
num_instructions_to_generate=num_instructions_to_generate,
Expand Down
121 changes: 97 additions & 24 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ components:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
comp-preflight-check-op:
executorLabel: exec-preflight-check-op
inputDefinitions:
parameters:
repo_branch:
parameterType: STRING
repo_pr:
parameterType: NUMBER_INTEGER
comp-sdg-op:
executorLabel: exec-sdg-op
inputDefinitions:
Expand All @@ -49,23 +57,40 @@ deploymentSpec:
exec-git-clone-op:
container:
args:
- 'git clone {{$.inputs.parameters[''repo_url'']}} {{$.outputs.artifacts[''taxonomy''].path}}
&& cd {{$.outputs.artifacts[''taxonomy''].path}} && if [ ! -z "{{$.inputs.parameters[''repo_branch'']}}"
]; then git fetch origin {{$.inputs.parameters[''repo_branch'']}} && git
checkout {{$.inputs.parameters[''repo_branch'']}}; elif [ ! -z "{{$.inputs.parameters[''repo_pr'']}}"
]; then git fetch origin pull/{{$.inputs.parameters[''repo_pr'']}}/head:{{$.inputs.parameters[''repo_pr'']}}
&& git checkout {{$.inputs.parameters[''repo_pr'']}}; fi '
- --executor_input
- '{{$}}'
- --function_to_execute
- git_clone_op
command:
- /bin/sh
- sh
- -c
image: registry.access.redhat.com/ubi9/toolbox
exec-sdg-op:
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.8.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef git_clone_op(\n taxonomy: dsl.Output[dsl.Dataset],\n repo_branch:\
\ str,\n repo_pr: Optional[int],\n repo_url: Optional[str],\n):\n\
\ return\n\n"
image: registry.access.redhat.com/ubi9/python-311:latest
exec-preflight-check-op:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- sdg_op
- preflight_check_op
command:
- sh
- -c
Expand All @@ -79,6 +104,36 @@ deploymentSpec:
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef preflight_check_op(\n repo_branch: str,\n repo_pr: Optional[int],\n\
):\n pass\n\n"
image: registry.access.redhat.com/ubi9/python-311:latest
exec-sdg-op:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- sdg_op
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.8.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
\ python3 -m pip install --quiet --no-warn-script-location 'git+https://github.com/redhat-et/ilab-on-ocp.git#subdirectory=sdg/faked/fixtures'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
Expand All @@ -87,20 +142,10 @@ deploymentSpec:
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef sdg_op(\n num_instructions_to_generate: int,\n taxonomy:\
\ dsl.Input[dsl.Dataset],\n sdg: dsl.Output[dsl.Dataset],\n repo_branch:\
\ Optional[str],\n repo_pr: Optional[int],\n):\n import openai\n \
\ from instructlab.sdg import generate_data\n from instructlab.sdg.utils.taxonomy\
\ import read_taxonomy\n from os import getenv\n\n api_key = getenv(\"\
api_key\")\n model = getenv(\"model\")\n endpoint = getenv(\"endpoint\"\
)\n client = openai.OpenAI(base_url=endpoint, api_key=api_key)\n\n \
\ taxonomy_base = \"main\" if repo_branch or repo_pr else \"empty\"\n\n\
\ print(\"Generating syntetic dataset for:\")\n print()\n print(read_taxonomy(taxonomy.path,\
\ taxonomy_base))\n\n # generate_data has a magic word for its taxonomy_base\
\ argument - `empty`\n # it allows generating from the whole repo, see:\n\
\ # https://github.com/instructlab/sdg/blob/c6a9e74a1618b1077cd38e713b8aaed8b7c0c8ce/src/instructlab/sdg/utils/taxonomy.py#L230\n\
\ generate_data(\n client=client,\n num_instructions_to_generate=num_instructions_to_generate,\n\
\ output_dir=sdg.path,\n taxonomy=taxonomy.path,\n \
\ taxonomy_base=taxonomy_base,\n model_name=model,\n )\n\n"
image: quay.io/tcoufal/ilab-sdg:latest
\ Optional[str],\n repo_pr: Optional[int],\n):\n import sys\n from\
\ pathlib import Path\n import shutil\n\n shutil.copytree(Path(sys.prefix)\
\ / \"sdg_fixtures\", sdg.path, dirs_exist_ok=True)\n return\n\n"
image: registry.access.redhat.com/ubi9/python-311:latest
pipelineInfo:
description: InstructLab pipeline
displayName: InstructLab
Expand All @@ -113,6 +158,8 @@ root:
enableCache: true
componentRef:
name: comp-git-clone-op
dependentTasks:
- preflight-check-op
inputs:
parameters:
repo_branch:
Expand All @@ -123,6 +170,19 @@ root:
componentInputParameter: repo_url
taskInfo:
name: git-clone-op
preflight-check-op:
cachingOptions:
enableCache: true
componentRef:
name: comp-preflight-check-op
inputs:
parameters:
repo_branch:
componentInputParameter: repo_branch
repo_pr:
componentInputParameter: repo_pr
taskInfo:
name: preflight-check-op
sdg-op:
cachingOptions:
enableCache: true
Expand Down Expand Up @@ -168,6 +228,19 @@ platforms:
kubernetes:
deploymentSpec:
executors:
exec-preflight-check-op:
configMapAsEnv:
- configMapName: kfp-model-server
keyToEnv:
- configMapKey: endpoint
envVar: endpoint
- configMapKey: model
envVar: model
secretAsEnv:
- keyToEnv:
- envVar: api_key
secretKey: api_key
secretName: kfp-model-server
exec-sdg-op:
configMapAsEnv:
- configMapName: kfp-model-server
Expand Down
4 changes: 2 additions & 2 deletions sdg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .components import git_clone_op, sdg_op
from .components import preflight_check_op, git_clone_op, sdg_op
from . import faked

__all__ = ["git_clone_op", "sdg_op", "faked"]
__all__ = ["preflight_check_op", "git_clone_op", "sdg_op", "faked"]
20 changes: 20 additions & 0 deletions sdg/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@

IMAGE = "quay.io/tcoufal/ilab-sdg:latest"

@dsl.component(base_image=IMAGE)
def preflight_check_op(
repo_branch: str,
repo_pr: Optional[int],
):
from os import getenv

if (not repo_branch) and (repo_pr is None or repo_pr <= 0 ):
raise Exception("Both taxonomy repo branch and taxonomy pull request number cannot be empty")
api_key = getenv("api_key")
model = getenv("model")
endpoint = getenv("endpoint")

if not api_key:
raise Exception("Model Server Auth Key is missing in kfp-model-server secret")
if not model:
raise Exception("Model name is missing in kfp-model-server configMap")
if not endpoint:
raise Exception("Model Server endpoint URL is missing in kfp-model-server configMap")

@dsl.container_component
def git_clone_op(
taxonomy: dsl.Output[dsl.Dataset],
Expand Down
4 changes: 2 additions & 2 deletions sdg/faked/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .components import git_clone_op, sdg_op
from .components import preflight_check_op, git_clone_op, sdg_op

__all__ = ["git_clone_op", "sdg_op"]
__all__ = ["preflight_check_op", "git_clone_op", "sdg_op"]
7 changes: 7 additions & 0 deletions sdg/faked/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@

IMAGE = "registry.access.redhat.com/ubi9/python-311:latest"

@dsl.component(base_image=IMAGE)
def preflight_check_op(
repo_branch: str,
repo_pr: Optional[int],
):
pass

@dsl.component(base_image=IMAGE)
def git_clone_op(
taxonomy: dsl.Output[dsl.Dataset],
Expand Down

0 comments on commit 19bfb68

Please sign in to comment.