From 11e1fd8d36b7500d9b2e5c352135008efa26a02e Mon Sep 17 00:00:00 2001 From: Michael Clifford Date: Thu, 21 Nov 2024 10:09:59 -0500 Subject: [PATCH] wip: implementing conditional pipeline logic Signed-off-by: Michael Clifford --- pipeline.py | 712 ++++---- pipeline.yaml | 4266 +++++++++++++++++++++++++++++++++++---------- sdg/__init__.py | 2 + sdg/components.py | 7 + 4 files changed, 3799 insertions(+), 1188 deletions(-) diff --git a/pipeline.py b/pipeline.py index cd9fa1e8..cc2bff4a 100644 --- a/pipeline.py +++ b/pipeline.py @@ -64,6 +64,7 @@ def pipeline_wrapper(mock: List[Literal[MOCKED_STAGES]]): ) else: from sdg import ( + get_training_data, git_clone_op, sdg_op, sdg_to_artifact_op, @@ -102,6 +103,8 @@ def pipeline_wrapper(mock: List[Literal[MOCKED_STAGES]]): # Imports for evaluation from eval.final import run_final_eval_op + + ## from eval.mmlu import run_mmlu_op, load_mmlu_results_op from eval.mt_bench import run_mt_bench_op from utils import list_models_in_directory_op @@ -112,6 +115,7 @@ def pipeline_wrapper(mock: List[Literal[MOCKED_STAGES]]): ) def pipeline( # SDG phase + sdg_only: bool = False, sdg_repo_url: str = "https://github.com/instructlab/taxonomy.git", sdg_repo_branch: Optional[str] = None, sdg_repo_pr: Optional[int] = None, @@ -120,6 +124,7 @@ def pipeline( sdg_pipeline: str = SDG_PIPELINE, sdg_max_batch_len: int = MAX_BATCH_LEN, # Training phase + train_only: bool = False, train_nproc_per_node: int = 3, train_nnodes: int = 2, train_num_epochs_phase_1: int = NUM_EPOCHS_PHASE_1, @@ -180,318 +185,397 @@ def pipeline( k8s_storage_class_name: A Kubernetes StorageClass name for persistent volumes. Selected StorageClass must support RWX PersistentVolumes. """ - # SDG stage - sdg_input_pvc_task = CreatePVC( - pvc_name_suffix="-sdg", - access_modes=["ReadWriteMany"], - size="10Gi", - storage_class_name=k8s_storage_class_name, - ) - git_clone_task = git_clone_op( - repo_branch=sdg_repo_branch, - repo_pr=sdg_repo_pr if sdg_repo_pr and sdg_repo_pr > 0 else None, - repo_url=sdg_repo_url, - ) - mount_pvc( - task=git_clone_task, - pvc_name=sdg_input_pvc_task.output, - mount_path="/data", - ) - git_clone_task.set_caching_options(False) - - sdg_task = sdg_op( - num_instructions_to_generate=sdg_scale_factor, - pipeline=sdg_pipeline, - repo_branch=sdg_repo_branch, - repo_pr=sdg_repo_pr, - ) - sdg_task.set_env_variable("HOME", "/tmp") - sdg_task.set_env_variable("HF_HOME", "/tmp") - use_config_map_as_env( - sdg_task, TEACHER_CONFIG_MAP, dict(endpoint="endpoint", model="model") - ) - use_secret_as_env(sdg_task, TEACHER_SECRET, {"api_key": "api_key"}) - sdg_task.after(git_clone_task) - mount_pvc( - task=sdg_task, - pvc_name=sdg_input_pvc_task.output, - mount_path="/data", - ) - sdg_task.set_caching_options(False) - - # Upload "sdg" and "taxonomy" artifacts to S3 without blocking the rest of the workflow - taxonomy_to_artifact_task = taxonomy_to_artifact_op() - taxonomy_to_artifact_task.after(git_clone_task, sdg_task) - mount_pvc( - task=taxonomy_to_artifact_task, - pvc_name=sdg_input_pvc_task.output, - mount_path="/data", - ) - sdg_to_artifact_task = sdg_to_artifact_op() - sdg_to_artifact_task.after(git_clone_task, sdg_task) - mount_pvc( - task=sdg_to_artifact_task, - pvc_name=sdg_input_pvc_task.output, - mount_path="/data", - ) - - set_image_pull_secrets(sdg_task, [IMAGE_PULL_SECRET]) - - # uncomment if updating image with same tag - # set_image_pull_policy(sdg_task, "Always") - - # Training stage - - # We need to pass storage_class_name as "" to use the default StorageClass, if left empty, KFP uses "standard" StorageClass. - # 'standard' != default StorageClass - # https://github.com/kubeflow/pipelines/blob/1cded35cf5e93d8c8d32fefbddceb2eed8de9a0a/backend/src/v2/driver/driver.go#L1428-L1436 - # At least we made it a pipeline parameter - model_pvc_task = CreatePVC( - pvc_name_suffix="-model-cache", - access_modes=["ReadWriteMany"], - size="100Gi", - storage_class_name=k8s_storage_class_name, - ) - model_to_pvc_task = huggingface_importer_op(repo_name=sdg_base_model) - model_to_pvc_task.set_caching_options(False) - mount_pvc( - task=model_to_pvc_task, pvc_name=model_pvc_task.output, mount_path="/model" - ) - - # Data processing - data_processing_task = data_processing_op(max_batch_len=sdg_max_batch_len) - mount_pvc( - task=data_processing_task, - pvc_name=model_pvc_task.output, - mount_path="/model", - ) - mount_pvc( - task=data_processing_task, - pvc_name=sdg_input_pvc_task.output, - mount_path="/data", - ) - data_processing_task.after(model_to_pvc_task, sdg_task) - data_processing_task.set_caching_options(False) - - set_image_pull_secrets(data_processing_task, [IMAGE_PULL_SECRET]) - - # Upload "skills_processed_data" and "knowledge_processed_data" artifacts to S3 without blocking the rest of the workflow - skills_processed_data_to_artifact_task = skills_processed_data_to_artifact_op() - skills_processed_data_to_artifact_task.after(data_processing_task) - mount_pvc( - task=skills_processed_data_to_artifact_task, - pvc_name=sdg_input_pvc_task.output, - mount_path="/data", - ) - skills_processed_data_to_artifact_task.set_caching_options(False) - knowledge_processed_data_to_artifact_task = ( - knowledge_processed_data_to_artifact_op() - ) - knowledge_processed_data_to_artifact_task.after(data_processing_task) - mount_pvc( - task=knowledge_processed_data_to_artifact_task, - pvc_name=sdg_input_pvc_task.output, - mount_path="/data", - ) - knowledge_processed_data_to_artifact_task.set_caching_options(False) - - output_pvc_task = CreatePVC( - pvc_name_suffix="-output", - access_modes=["ReadWriteMany"], - size="100Gi", - storage_class_name=k8s_storage_class_name, - ) - - # Using pvc_create_task.output as PyTorchJob name since dsl.PIPELINE_* global variables do not template/work in KFP v2 - # https://github.com/kubeflow/pipelines/issues/10453 - pytorchjob_manifest_task = pytorchjob_manifest_op( - model_pvc_name=model_pvc_task.output, - input_pvc_name=sdg_input_pvc_task.output, - name_suffix=sdg_input_pvc_task.output, - output_pvc_name=output_pvc_task.output, - phase_num=1, - nproc_per_node=train_nproc_per_node, - nnodes=train_nnodes, - num_epochs=train_num_epochs_phase_1, - effective_batch_size=train_effective_batch_size_phase_1, - learning_rate=train_learning_rate_phase_1, - num_warmup_steps=train_num_warmup_steps_phase_1, - save_samples=train_save_samples, - max_batch_len=train_max_batch_len, - seed=train_seed, - ) - pytorchjob_manifest_task.set_caching_options(False) - - kubectl_apply_task = kubectl_apply_op( - manifest=pytorchjob_manifest_task.outputs["manifest"] - ) - kubectl_apply_task.after(data_processing_task, model_to_pvc_task) - kubectl_apply_task.set_caching_options(False) - - kubectl_wait_task = kubectl_wait_for_op( - condition="condition=Succeeded", - kind="pytorchjobs", - name=pytorchjob_manifest_task.outputs["name"], - ) - kubectl_wait_task.after(kubectl_apply_task) - kubectl_wait_task.set_caching_options(False) - - #### Train 2 - - pytorchjob_manifest_2_task = pytorchjob_manifest_op( - model_pvc_name=model_pvc_task.output, - input_pvc_name=sdg_input_pvc_task.output, - name_suffix=sdg_input_pvc_task.output, - output_pvc_name=output_pvc_task.output, - phase_num=2, - nproc_per_node=train_nproc_per_node, - nnodes=train_nnodes, - num_epochs=train_num_epochs_phase_2, - effective_batch_size=train_effective_batch_size_phase_2, - learning_rate=train_learning_rate_phase_2, - num_warmup_steps=train_num_warmup_steps_phase_2, - save_samples=train_save_samples, - max_batch_len=train_max_batch_len, - seed=train_seed, - ) - - pytorchjob_manifest_2_task.set_caching_options(False) - pytorchjob_manifest_2_task.after(kubectl_wait_task) - - mount_pvc( - task=pytorchjob_manifest_2_task, - pvc_name=output_pvc_task.output, - mount_path="/output", - ) - - kubectl_apply_2_task = kubectl_apply_op( - manifest=pytorchjob_manifest_2_task.outputs["manifest"] - ) - kubectl_apply_2_task.set_caching_options(False) - - kubectl_wait_2_task = kubectl_wait_for_op( - condition="condition=Succeeded", - kind="pytorchjobs", - name=pytorchjob_manifest_2_task.outputs["name"], - ) - kubectl_wait_2_task.after(kubectl_apply_2_task) - kubectl_wait_2_task.set_caching_options(False) - - models_list_2_task = list_models_in_directory_op( - models_folder="/output/phase_2/model/hf_format", - ) - models_list_2_task.set_caching_options(False) - models_list_2_task.after(kubectl_wait_2_task) - mount_pvc( - task=models_list_2_task, - pvc_name=output_pvc_task.output, - mount_path="/output", - ) - - # MT_Bench Evaluation of models - - run_mt_bench_task = run_mt_bench_op( - models_list=models_list_2_task.output, - models_path_prefix="/output/phase_2/model/hf_format", - max_workers=mt_bench_max_workers, - merge_system_user_message=mt_bench_merge_system_user_message, - ) - mount_pvc( - task=run_mt_bench_task, - pvc_name=output_pvc_task.output, - mount_path="/output", - ) - run_mt_bench_task.set_env_variable("HOME", "/tmp") - run_mt_bench_task.set_env_variable("HF_HOME", "/tmp") - run_mt_bench_task.set_accelerator_type("nvidia.com/gpu") - run_mt_bench_task.set_accelerator_limit(1) - run_mt_bench_task.set_caching_options(False) - use_config_map_as_env( - run_mt_bench_task, - JUDGE_CONFIG_MAP, - dict(endpoint="JUDGE_ENDPOINT", model="JUDGE_NAME"), - ) - set_image_pull_secrets(run_mt_bench_task, [IMAGE_PULL_SECRET]) - use_secret_as_env(run_mt_bench_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"}) - - # uncomment if updating image with same tag - # set_image_pull_policy(run_mt_bench_task, "Always") - - final_eval_task = run_final_eval_op( - candidate_model="/output/phase_2/model/hf_format/candidate_model", - # TODO: DO we need both candidate_branch and base_branch - base_branch=sdg_repo_branch, - candidate_branch=sdg_repo_branch, - base_model_dir="/model/", - max_workers=final_eval_max_workers, - merge_system_user_message=final_eval_merge_system_user_message, - few_shots=final_eval_few_shots, - batch_size=final_eval_batch_size, - ) - mount_pvc( - task=final_eval_task, pvc_name=output_pvc_task.output, mount_path="/output" - ) - mount_pvc( - task=final_eval_task, - pvc_name=sdg_input_pvc_task.output, - mount_path="/input", - ) - mount_pvc( - task=final_eval_task, - pvc_name=model_pvc_task.output, - mount_path="/model", - ) - - use_config_map_as_env( - final_eval_task, - JUDGE_CONFIG_MAP, - dict(endpoint="JUDGE_ENDPOINT", model="JUDGE_NAME"), - ) - - final_eval_task.set_env_variable("HOME", "/tmp") - final_eval_task.set_env_variable("HF_HOME", "/tmp") - set_image_pull_secrets(final_eval_task, [IMAGE_PULL_SECRET]) - - # uncomment if updating image with same tag - # set_image_pull_policy(final_eval_task, "Always") - - use_secret_as_env(final_eval_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"}) - - final_eval_task.after(run_mt_bench_task) - final_eval_task.set_accelerator_type("nvidia.com/gpu") - final_eval_task.set_accelerator_limit(1) - - output_model_task = pvc_to_model_op( - pvc_path="/output/phase_2/model/hf_format/candidate_model", - ) - output_model_task.after(run_mt_bench_task) - output_model_task.set_caching_options(False) - mount_pvc( - task=output_model_task, - pvc_name=output_pvc_task.output, - mount_path="/output", - ) - - output_mt_bench_task = pvc_to_mt_bench_op( - pvc_path="/output/mt_bench_data.json", - ) - output_mt_bench_task.after(run_mt_bench_task) - mount_pvc( - task=output_mt_bench_task, - pvc_name=output_pvc_task.output, - mount_path="/output", - ) - - output_pvc_delete_task = DeletePVC(pvc_name=output_pvc_task.output) - output_pvc_delete_task.after( - output_model_task, output_mt_bench_task, final_eval_task - ) - - sdg_pvc_delete_task = DeletePVC(pvc_name=sdg_input_pvc_task.output) - sdg_pvc_delete_task.after(final_eval_task) - - model_pvc_delete_task = DeletePVC(pvc_name=model_pvc_task.output) - model_pvc_delete_task.after(final_eval_task) + def create_pvcs(): + sdg_input_pvc_task = CreatePVC( + pvc_name_suffix="-sdg", + access_modes=["ReadWriteMany"], + size="10Gi", + storage_class_name=k8s_storage_class_name, + ) + + model_pvc_task = CreatePVC( + pvc_name_suffix="-model-cache", + access_modes=["ReadWriteMany"], + size="100Gi", + storage_class_name=k8s_storage_class_name, + ) + + output_pvc_task = CreatePVC( + pvc_name_suffix="-output", + access_modes=["ReadWriteMany"], + size="100Gi", + storage_class_name=k8s_storage_class_name, + ) + + return model_pvc_task, sdg_input_pvc_task, output_pvc_task + + def delete_pvcs(model_pvc_task, sdg_input_pvc_task, output_pvc_task, after): + output_pvc_task = DeletePVC(pvc_name=output_pvc_task.output) + output_pvc_task.after(after) + sdg_input_pvc_task = DeletePVC(pvc_name=sdg_input_pvc_task.output) + sdg_input_pvc_task.after(after) + model_pvc_delete_task = DeletePVC(pvc_name=model_pvc_task.output) + model_pvc_delete_task.after(after) + + def sdg_stage( + sdg_input_pvc, + ): + # SDG stage + + git_clone_task = git_clone_op( + repo_branch=sdg_repo_branch, + repo_pr=sdg_repo_pr if sdg_repo_pr and sdg_repo_pr > 0 else None, + repo_url=sdg_repo_url, + ) + mount_pvc( + task=git_clone_task, + pvc_name=sdg_input_pvc_task.output, + mount_path="/data", + ) + git_clone_task.set_caching_options(False) + + sdg_task = sdg_op( + num_instructions_to_generate=sdg_scale_factor, + pipeline=sdg_pipeline, + repo_branch=sdg_repo_branch, + repo_pr=sdg_repo_pr, + ) + sdg_task.set_env_variable("HOME", "/tmp") + sdg_task.set_env_variable("HF_HOME", "/tmp") + use_config_map_as_env( + sdg_task, TEACHER_CONFIG_MAP, dict(endpoint="endpoint", model="model") + ) + use_secret_as_env(sdg_task, TEACHER_SECRET, {"api_key": "api_key"}) + sdg_task.after(git_clone_task) + mount_pvc( + task=sdg_task, + pvc_name=sdg_input_pvc_task.output, + mount_path="/data", + ) + sdg_task.set_caching_options(False) + + # Upload "sdg" and "taxonomy" artifacts to S3 without blocking the rest of the workflow + taxonomy_to_artifact_task = taxonomy_to_artifact_op() + taxonomy_to_artifact_task.after(git_clone_task, sdg_task) + mount_pvc( + task=taxonomy_to_artifact_task, + pvc_name=sdg_input_pvc_task.output, + mount_path="/data", + ) + sdg_to_artifact_task = sdg_to_artifact_op() + sdg_to_artifact_task.after(git_clone_task, sdg_task) + mount_pvc( + task=sdg_to_artifact_task, + pvc_name=sdg_input_pvc_task.output, + mount_path="/data", + ) + + set_image_pull_secrets(sdg_task, [IMAGE_PULL_SECRET]) + # uncomment if updating image with same tag + # set_image_pull_policy(sdg_task, "Always") + + return sdg_task + + def train_stage(): + # Training stage + + # We need to pass storage_class_name as "" to use the default StorageClass, if left empty, KFP uses "standard" StorageClass. + # 'standard' != default StorageClass + # https://github.com/kubeflow/pipelines/blob/1cded35cf5e93d8c8d32fefbddceb2eed8de9a0a/backend/src/v2/driver/driver.go#L1428-L1436 + # At least we made it a pipeline parameter + + model_to_pvc_task = huggingface_importer_op(repo_name=sdg_base_model) + model_to_pvc_task.set_caching_options(False) + mount_pvc( + task=model_to_pvc_task, + pvc_name=model_pvc_task.output, + mount_path="/model", + ) + + # Data processing + data_processing_task = data_processing_op(max_batch_len=sdg_max_batch_len) + + mount_pvc( + task=data_processing_task, + pvc_name=model_pvc_task.output, + mount_path="/model", + ) + mount_pvc( + task=data_processing_task, + pvc_name=sdg_input_pvc_task.output, + mount_path="/data", + ) + data_processing_task.after(model_to_pvc_task, sdg_task) + data_processing_task.set_caching_options(False) + + set_image_pull_secrets(data_processing_task, [IMAGE_PULL_SECRET]) + + # Upload "skills_processed_data" and "knowledge_processed_data" artifacts to S3 without blocking the rest of the workflow + skills_processed_data_to_artifact_task = ( + skills_processed_data_to_artifact_op() + ) + skills_processed_data_to_artifact_task.after(data_processing_task) + mount_pvc( + task=skills_processed_data_to_artifact_task, + pvc_name=sdg_input_pvc_task.output, + mount_path="/data", + ) + skills_processed_data_to_artifact_task.set_caching_options(False) + knowledge_processed_data_to_artifact_task = ( + knowledge_processed_data_to_artifact_op() + ) + knowledge_processed_data_to_artifact_task.after(data_processing_task) + mount_pvc( + task=knowledge_processed_data_to_artifact_task, + pvc_name=sdg_input_pvc_task.output, + mount_path="/data", + ) + knowledge_processed_data_to_artifact_task.set_caching_options(False) + + ### Created output PVC + + # Using pvc_create_task.output as PyTorchJob name since dsl.PIPELINE_* global variables do not template/work in KFP v2 + # https://github.com/kubeflow/pipelines/issues/10453 + pytorchjob_manifest_task = pytorchjob_manifest_op( + model_pvc_name=model_pvc_task.output, + input_pvc_name=sdg_input_pvc_task.output, + name_suffix=sdg_input_pvc_task.output, + output_pvc_name=output_pvc_task.output, + phase_num=1, + nproc_per_node=train_nproc_per_node, + nnodes=train_nnodes, + num_epochs=train_num_epochs_phase_1, + effective_batch_size=train_effective_batch_size_phase_1, + learning_rate=train_learning_rate_phase_1, + num_warmup_steps=train_num_warmup_steps_phase_1, + save_samples=train_save_samples, + max_batch_len=train_max_batch_len, + seed=train_seed, + ) + pytorchjob_manifest_task.set_caching_options(False) + + kubectl_apply_task = kubectl_apply_op( + manifest=pytorchjob_manifest_task.outputs["manifest"] + ) + kubectl_apply_task.after(data_processing_task, model_to_pvc_task) + kubectl_apply_task.set_caching_options(False) + + kubectl_wait_task = kubectl_wait_for_op( + condition="condition=Succeeded", + kind="pytorchjobs", + name=pytorchjob_manifest_task.outputs["name"], + ) + kubectl_wait_task.after(kubectl_apply_task) + kubectl_wait_task.set_caching_options(False) + + #### Train 2 + + pytorchjob_manifest_2_task = pytorchjob_manifest_op( + model_pvc_name=model_pvc_task.output, + input_pvc_name=sdg_input_pvc_task.output, + name_suffix=sdg_input_pvc_task.output, + output_pvc_name=output_pvc_task.output, + phase_num=2, + nproc_per_node=train_nproc_per_node, + nnodes=train_nnodes, + num_epochs=train_num_epochs_phase_2, + effective_batch_size=train_effective_batch_size_phase_2, + learning_rate=train_learning_rate_phase_2, + num_warmup_steps=train_num_warmup_steps_phase_2, + save_samples=train_save_samples, + max_batch_len=train_max_batch_len, + seed=train_seed, + ) + + pytorchjob_manifest_2_task.set_caching_options(False) + pytorchjob_manifest_2_task.after(kubectl_wait_task) + + mount_pvc( + task=pytorchjob_manifest_2_task, + pvc_name=output_pvc_task.output, + mount_path="/output", + ) + + kubectl_apply_2_task = kubectl_apply_op( + manifest=pytorchjob_manifest_2_task.outputs["manifest"] + ) + kubectl_apply_2_task.set_caching_options(False) + + kubectl_wait_2_task = kubectl_wait_for_op( + condition="condition=Succeeded", + kind="pytorchjobs", + name=pytorchjob_manifest_2_task.outputs["name"], + ) + kubectl_wait_2_task.after(kubectl_apply_2_task) + kubectl_wait_2_task.set_caching_options(False) + + models_list_2_task = list_models_in_directory_op( + models_folder="/output/phase_2/model/hf_format", + ) + models_list_2_task.set_caching_options(False) + models_list_2_task.after(kubectl_wait_2_task) + mount_pvc( + task=models_list_2_task, + pvc_name=output_pvc_task.output, + mount_path="/output", + ) + + return models_list_2_task + + def mt_bench_stage(): + # MT_Bench Evaluation of models + + run_mt_bench_task = run_mt_bench_op( + models_list=models_list_2_task.output, + models_path_prefix="/output/phase_2/model/hf_format", + max_workers=mt_bench_max_workers, + merge_system_user_message=mt_bench_merge_system_user_message, + ) + mount_pvc( + task=run_mt_bench_task, + pvc_name=output_pvc_task.output, + mount_path="/output", + ) + run_mt_bench_task.set_env_variable("HOME", "/tmp") + run_mt_bench_task.set_env_variable("HF_HOME", "/tmp") + run_mt_bench_task.set_accelerator_type("nvidia.com/gpu") + run_mt_bench_task.set_accelerator_limit(1) + run_mt_bench_task.set_caching_options(False) + use_config_map_as_env( + run_mt_bench_task, + JUDGE_CONFIG_MAP, + dict(endpoint="JUDGE_ENDPOINT", model="JUDGE_NAME"), + ) + set_image_pull_secrets(run_mt_bench_task, [IMAGE_PULL_SECRET]) + use_secret_as_env( + run_mt_bench_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"} + ) + + # uncomment if updating image with same tag + # set_image_pull_policy(run_mt_bench_task, "Always") + return run_mt_bench_task + + def final_eval_stage(): + final_eval_task = run_final_eval_op( + candidate_model="/output/phase_2/model/hf_format/candidate_model", + # TODO: DO we need both candidate_branch and base_branch + base_branch=sdg_repo_branch, + candidate_branch=sdg_repo_branch, + base_model_dir="/model/", + max_workers=final_eval_max_workers, + merge_system_user_message=final_eval_merge_system_user_message, + few_shots=final_eval_few_shots, + batch_size=final_eval_batch_size, + ) + mount_pvc( + task=final_eval_task, + pvc_name=output_pvc_task.output, + mount_path="/output", + ) + mount_pvc( + task=final_eval_task, + pvc_name=sdg_input_pvc_task.output, + mount_path="/input", + ) + mount_pvc( + task=final_eval_task, + pvc_name=model_pvc_task.output, + mount_path="/model", + ) + + use_config_map_as_env( + final_eval_task, + JUDGE_CONFIG_MAP, + dict(endpoint="JUDGE_ENDPOINT", model="JUDGE_NAME"), + ) + + final_eval_task.set_env_variable("HOME", "/tmp") + final_eval_task.set_env_variable("HF_HOME", "/tmp") + set_image_pull_secrets(final_eval_task, [IMAGE_PULL_SECRET]) + + # uncomment if updating image with same tag + # set_image_pull_policy(final_eval_task, "Always") + + use_secret_as_env( + final_eval_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"} + ) + + final_eval_task.after(run_mt_bench_task) + final_eval_task.set_accelerator_type("nvidia.com/gpu") + final_eval_task.set_accelerator_limit(1) + + return final_eval_task + + def outputs_to_artifacts(): + output_model_task = pvc_to_model_op( + pvc_path="/output/phase_2/model/hf_format/candidate_model", + ) + output_model_task.after(run_mt_bench_task) + output_model_task.set_caching_options(False) + mount_pvc( + task=output_model_task, + pvc_name=output_pvc_task.output, + mount_path="/output", + ) + + output_mt_bench_task = pvc_to_mt_bench_op( + pvc_path="/output/mt_bench_data.json", + ) + output_mt_bench_task.after(run_mt_bench_task) + mount_pvc( + task=output_mt_bench_task, + pvc_name=output_pvc_task.output, + mount_path="/output", + ) + + # Pipelines + + # some pre-filght image that checks all params and fails if something is wrong. + + with dsl.If(sdg_only == True and train_only == True, name="Skip Condition"): + output_pvc_task, sdg_input_pvc_task, model_pvc_task = create_pvcs() + delete_pvcs( + output_pvc_task=output_pvc_task, + sdg_input_pvc_task=sdg_input_pvc_task, + model_pvc_task=model_pvc_task, + after=output_pvc_task, + ) + + with dsl.If(sdg_only == True and train_only == False, name="SDG Only"): + output_pvc_task, sdg_input_pvc_task, model_pvc_task = create_pvcs() + sdg_task = sdg_stage(sdg_input_pvc=sdg_input_pvc_task) + delete_pvcs( + output_pvc_task=output_pvc_task, + sdg_input_pvc_task=sdg_input_pvc_task, + model_pvc_task=model_pvc_task, + after=sdg_task, + ) + + with dsl.If(train_only == True and sdg_only == False, name="Train Only"): + output_pvc_task, sdg_input_pvc_task, model_pvc_task = create_pvcs() + # need a way to insert knowledge and skills data + sdg_task = get_training_data() + train_stage() + delete_pvcs( + output_pvc_task=output_pvc_task, + sdg_input_pvc_task=sdg_input_pvc_task, + model_pvc_task=model_pvc_task, + after=output_pvc_task, + ) + + with dsl.If(sdg_only == False and train_only == False, name="All Stages"): + output_pvc_task, sdg_input_pvc_task, model_pvc_task = create_pvcs() + sdg_task = sdg_stage(sdg_input_pvc=sdg_input_pvc_task) + models_list_2_task = train_stage() + run_mt_bench_task = mt_bench_stage() + final_eval_task = final_eval_stage() + outputs_to_artifacts_task = outputs_to_artifacts() + delete_pvcs( + output_pvc_task=output_pvc_task, + sdg_input_pvc_task=sdg_input_pvc_task, + model_pvc_task=model_pvc_task, + after=final_eval_task, + ) return @@ -554,11 +638,11 @@ def gen_standalone(): # The list of executor names to extract details from to generate the standalone script executors = { "exec-data-processing-op": 'data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg_path="{DATA_PVC_SDG_PATH}", model_path="{DATA_PVC_MODEL_PATH}", skills_path="{PREPROCESSED_DATA_SKILLS_PATH}", knowledge_path="{PREPROCESSED_DATA_KNOWLEDGE_PATH}")', - "exec-sdg-op": 'sdg_op(num_instructions_to_generate={num_instructions_to_generate}, pipeline="{sdg_pipeline}", repo_branch="{exec_git_clone_op_repo_branch or ""}", repo_pr={exec_git_clone_op_repo_pr or 0}, taxonomy_path="{TAXONOMY_DATA_PATH}", sdg_path="{DATA_PVC_SDG_PATH}")', + "exec-sdg-op": 'sdg_op(num_instructions_to_generate={num_instructions_to_generate}, pipeline="{sdg_pipeline}", repo_branch="{exec_git_clone_op_repo_branch}", repo_pr={exec_git_clone_op_repo_pr}, taxonomy_path="{TAXONOMY_DATA_PATH}", sdg_path="{DATA_PVC_SDG_PATH}")', "exec-git-clone-op": {}, "exec-huggingface-importer-op": 'huggingface_importer_op(repo_name="{REPO_GRANITE_7B_IMAGE}", model_path="{DATA_PVC_MODEL_PATH}")', "exec-run-mt-bench-op": 'run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",output_path="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}",models_path_prefix="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})', - "exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_BRANCH_SCORES_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy_path="{TAXONOMY_PATH}", sdg_path="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, few_shots={FEW_SHOTS}, batch_size="{BATCH_SIZE}")', + "exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_BRANCH_SCORES_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy_path="{TAXONOMY_PATH}", sdg_path="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", device=None, base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, model_dtype="{MODEL_DTYPE}", few_shots={FEW_SHOTS}, batch_size="{BATCH_SIZE}")', } details = {} diff --git a/pipeline.yaml b/pipeline.yaml index 9cf69eb7..6583ff4b 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -11,6 +11,7 @@ # mt_bench_merge_system_user_message: bool [Default: False] # sdg_base_model: str [Default: 'ibm-granite/granite-7b-base'] # sdg_max_batch_len: int [Default: 20000.0] +# sdg_only: bool [Default: False] # sdg_pipeline: str [Default: 'simple'] # sdg_repo_branch: str # sdg_repo_pr: int @@ -27,9 +28,1244 @@ # train_num_epochs_phase_2: int [Default: 2.0] # train_num_warmup_steps_phase_1: int [Default: 100.0] # train_num_warmup_steps_phase_2: int [Default: 100.0] +# train_only: bool [Default: False] # train_save_samples: int [Default: 0.0] # train_seed: int [Default: 42.0] components: + comp-condition-1: + dag: + tasks: + createpvc: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -sdg + size: + runtimeValue: + constant: 10Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc + createpvc-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-2 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -model-cache + size: + runtimeValue: + constant: 100Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-2 + createpvc-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-3 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -output + size: + runtimeValue: + constant: 100Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-3 + deletepvc: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc + dependentTasks: + - createpvc-2 + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-2 + taskInfo: + name: deletepvc + deletepvc-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-2 + dependentTasks: + - createpvc + - createpvc-2 + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc + taskInfo: + name: deletepvc-2 + deletepvc-3: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-3 + dependentTasks: + - createpvc-2 + - createpvc-3 + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-3 + taskInfo: + name: deletepvc-3 + inputDefinitions: + parameters: + pipelinechannel--k8s_storage_class_name: + parameterType: STRING + pipelinechannel--train_only: + parameterType: BOOLEAN + comp-condition-2: + dag: + tasks: + createpvc-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-4 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -sdg + size: + runtimeValue: + constant: 10Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-4 + createpvc-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-5 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -model-cache + size: + runtimeValue: + constant: 100Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-5 + createpvc-6: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-6 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -output + size: + runtimeValue: + constant: 100Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-6 + deletepvc-4: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-4 + dependentTasks: + - createpvc-5 + - sdg-op + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-5 + taskInfo: + name: deletepvc-4 + deletepvc-5: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-5 + dependentTasks: + - createpvc-4 + - sdg-op + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-4 + taskInfo: + name: deletepvc-5 + deletepvc-6: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-6 + dependentTasks: + - createpvc-6 + - sdg-op + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-6 + taskInfo: + name: deletepvc-6 + git-clone-op: + cachingOptions: {} + componentRef: + name: comp-git-clone-op + dependentTasks: + - createpvc-4 + inputs: + parameters: + repo_branch: + componentInputParameter: pipelinechannel--sdg_repo_branch + repo_pr: + componentInputParameter: pipelinechannel--sdg_repo_pr + repo_url: + componentInputParameter: pipelinechannel--sdg_repo_url + taskInfo: + name: git-clone-op + sdg-op: + cachingOptions: {} + componentRef: + name: comp-sdg-op + dependentTasks: + - createpvc-4 + - git-clone-op + inputs: + parameters: + num_instructions_to_generate: + componentInputParameter: pipelinechannel--sdg_scale_factor + pipeline: + componentInputParameter: pipelinechannel--sdg_pipeline + repo_branch: + componentInputParameter: pipelinechannel--sdg_repo_branch + repo_pr: + componentInputParameter: pipelinechannel--sdg_repo_pr + taskInfo: + name: sdg-op + sdg-to-artifact-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-sdg-to-artifact-op + dependentTasks: + - createpvc-4 + - git-clone-op + - sdg-op + taskInfo: + name: sdg-to-artifact-op + taxonomy-to-artifact-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-taxonomy-to-artifact-op + dependentTasks: + - createpvc-4 + - git-clone-op + - sdg-op + taskInfo: + name: taxonomy-to-artifact-op + inputDefinitions: + parameters: + pipelinechannel--k8s_storage_class_name: + parameterType: STRING + pipelinechannel--sdg_pipeline: + parameterType: STRING + pipelinechannel--sdg_repo_branch: + parameterType: STRING + pipelinechannel--sdg_repo_pr: + parameterType: NUMBER_INTEGER + pipelinechannel--sdg_repo_url: + parameterType: STRING + pipelinechannel--sdg_scale_factor: + parameterType: NUMBER_INTEGER + pipelinechannel--train_only: + parameterType: BOOLEAN + comp-condition-3: + dag: + tasks: + createpvc-7: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-7 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -sdg + size: + runtimeValue: + constant: 10Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-7 + createpvc-8: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-8 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -model-cache + size: + runtimeValue: + constant: 100Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-8 + createpvc-9: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-9 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -output + size: + runtimeValue: + constant: 100Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-9 + data-processing-op: + cachingOptions: {} + componentRef: + name: comp-data-processing-op + dependentTasks: + - createpvc-7 + - createpvc-9 + - get-training-data + - huggingface-importer-op + inputs: + parameters: + max_batch_len: + componentInputParameter: pipelinechannel--sdg_max_batch_len + taskInfo: + name: data-processing-op + deletepvc-7: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-7 + dependentTasks: + - createpvc-8 + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-8 + taskInfo: + name: deletepvc-7 + deletepvc-8: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-8 + dependentTasks: + - createpvc-7 + - createpvc-8 + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-7 + taskInfo: + name: deletepvc-8 + deletepvc-9: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-9 + dependentTasks: + - createpvc-8 + - createpvc-9 + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-9 + taskInfo: + name: deletepvc-9 + get-training-data: + cachingOptions: + enableCache: true + componentRef: + name: comp-get-training-data + taskInfo: + name: get-training-data + huggingface-importer-op: + cachingOptions: {} + componentRef: + name: comp-huggingface-importer-op + dependentTasks: + - createpvc-9 + inputs: + parameters: + repo_name: + componentInputParameter: pipelinechannel--sdg_base_model + taskInfo: + name: huggingface-importer-op + knowledge-processed-data-to-artifact-op: + cachingOptions: {} + componentRef: + name: comp-knowledge-processed-data-to-artifact-op + dependentTasks: + - createpvc-7 + - data-processing-op + taskInfo: + name: knowledge-processed-data-to-artifact-op + kubectl-apply-op: + cachingOptions: {} + componentRef: + name: comp-kubectl-apply-op + dependentTasks: + - data-processing-op + - huggingface-importer-op + - pytorchjob-manifest-op + inputs: + parameters: + manifest: + taskOutputParameter: + outputParameterKey: manifest + producerTask: pytorchjob-manifest-op + taskInfo: + name: kubectl-apply-op + kubectl-apply-op-2: + cachingOptions: {} + componentRef: + name: comp-kubectl-apply-op-2 + dependentTasks: + - pytorchjob-manifest-op-2 + inputs: + parameters: + manifest: + taskOutputParameter: + outputParameterKey: manifest + producerTask: pytorchjob-manifest-op-2 + taskInfo: + name: kubectl-apply-op-2 + kubectl-wait-for-op: + cachingOptions: {} + componentRef: + name: comp-kubectl-wait-for-op + dependentTasks: + - kubectl-apply-op + - pytorchjob-manifest-op + inputs: + parameters: + condition: + runtimeValue: + constant: condition=Succeeded + kind: + runtimeValue: + constant: pytorchjobs + name: + taskOutputParameter: + outputParameterKey: name + producerTask: pytorchjob-manifest-op + taskInfo: + name: kubectl-wait-for-op + kubectl-wait-for-op-2: + cachingOptions: {} + componentRef: + name: comp-kubectl-wait-for-op-2 + dependentTasks: + - kubectl-apply-op-2 + - pytorchjob-manifest-op-2 + inputs: + parameters: + condition: + runtimeValue: + constant: condition=Succeeded + kind: + runtimeValue: + constant: pytorchjobs + name: + taskOutputParameter: + outputParameterKey: name + producerTask: pytorchjob-manifest-op-2 + taskInfo: + name: kubectl-wait-for-op-2 + list-models-in-directory-op: + cachingOptions: {} + componentRef: + name: comp-list-models-in-directory-op + dependentTasks: + - createpvc-8 + - kubectl-wait-for-op-2 + inputs: + parameters: + models_folder: + runtimeValue: + constant: /output/phase_2/model/hf_format + taskInfo: + name: list-models-in-directory-op + pytorchjob-manifest-op: + cachingOptions: {} + componentRef: + name: comp-pytorchjob-manifest-op + dependentTasks: + - createpvc-7 + - createpvc-8 + - createpvc-9 + inputs: + parameters: + effective_batch_size: + componentInputParameter: pipelinechannel--train_effective_batch_size_phase_1 + input_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-7 + learning_rate: + componentInputParameter: pipelinechannel--train_learning_rate_phase_1 + max_batch_len: + componentInputParameter: pipelinechannel--train_max_batch_len + model_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-9 + name_suffix: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-7 + nnodes: + componentInputParameter: pipelinechannel--train_nnodes + nproc_per_node: + componentInputParameter: pipelinechannel--train_nproc_per_node + num_epochs: + componentInputParameter: pipelinechannel--train_num_epochs_phase_1 + num_warmup_steps: + componentInputParameter: pipelinechannel--train_num_warmup_steps_phase_1 + output_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-8 + phase_num: + runtimeValue: + constant: 1.0 + save_samples: + componentInputParameter: pipelinechannel--train_save_samples + seed: + componentInputParameter: pipelinechannel--train_seed + taskInfo: + name: pytorchjob-manifest-op + pytorchjob-manifest-op-2: + cachingOptions: {} + componentRef: + name: comp-pytorchjob-manifest-op-2 + dependentTasks: + - createpvc-7 + - createpvc-8 + - createpvc-9 + - kubectl-wait-for-op + inputs: + parameters: + effective_batch_size: + componentInputParameter: pipelinechannel--train_effective_batch_size_phase_2 + input_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-7 + learning_rate: + componentInputParameter: pipelinechannel--train_learning_rate_phase_2 + max_batch_len: + componentInputParameter: pipelinechannel--train_max_batch_len + model_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-9 + name_suffix: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-7 + nnodes: + componentInputParameter: pipelinechannel--train_nnodes + nproc_per_node: + componentInputParameter: pipelinechannel--train_nproc_per_node + num_epochs: + componentInputParameter: pipelinechannel--train_num_epochs_phase_2 + num_warmup_steps: + componentInputParameter: pipelinechannel--train_num_warmup_steps_phase_2 + output_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-8 + phase_num: + runtimeValue: + constant: 2.0 + save_samples: + componentInputParameter: pipelinechannel--train_save_samples + seed: + componentInputParameter: pipelinechannel--train_seed + taskInfo: + name: pytorchjob-manifest-op-2 + skills-processed-data-to-artifact-op: + cachingOptions: {} + componentRef: + name: comp-skills-processed-data-to-artifact-op + dependentTasks: + - createpvc-7 + - data-processing-op + taskInfo: + name: skills-processed-data-to-artifact-op + inputDefinitions: + parameters: + pipelinechannel--k8s_storage_class_name: + parameterType: STRING + pipelinechannel--sdg_base_model: + parameterType: STRING + pipelinechannel--sdg_max_batch_len: + parameterType: NUMBER_INTEGER + pipelinechannel--sdg_only: + parameterType: BOOLEAN + pipelinechannel--train_effective_batch_size_phase_1: + parameterType: NUMBER_INTEGER + pipelinechannel--train_effective_batch_size_phase_2: + parameterType: NUMBER_INTEGER + pipelinechannel--train_learning_rate_phase_1: + parameterType: NUMBER_DOUBLE + pipelinechannel--train_learning_rate_phase_2: + parameterType: NUMBER_DOUBLE + pipelinechannel--train_max_batch_len: + parameterType: NUMBER_INTEGER + pipelinechannel--train_nnodes: + parameterType: NUMBER_INTEGER + pipelinechannel--train_nproc_per_node: + parameterType: NUMBER_INTEGER + pipelinechannel--train_num_epochs_phase_1: + parameterType: NUMBER_INTEGER + pipelinechannel--train_num_epochs_phase_2: + parameterType: NUMBER_INTEGER + pipelinechannel--train_num_warmup_steps_phase_1: + parameterType: NUMBER_INTEGER + pipelinechannel--train_num_warmup_steps_phase_2: + parameterType: NUMBER_INTEGER + pipelinechannel--train_save_samples: + parameterType: NUMBER_INTEGER + pipelinechannel--train_seed: + parameterType: NUMBER_INTEGER + comp-condition-4: + dag: + tasks: + createpvc-10: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-10 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -sdg + size: + runtimeValue: + constant: 10Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-10 + createpvc-11: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-11 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -model-cache + size: + runtimeValue: + constant: 100Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-11 + createpvc-12: + cachingOptions: + enableCache: true + componentRef: + name: comp-createpvc-12 + inputs: + parameters: + access_modes: + runtimeValue: + constant: + - ReadWriteMany + pvc_name_suffix: + runtimeValue: + constant: -output + size: + runtimeValue: + constant: 100Gi + storage_class_name: + componentInputParameter: pipelinechannel--k8s_storage_class_name + taskInfo: + name: createpvc-12 + data-processing-op-2: + cachingOptions: {} + componentRef: + name: comp-data-processing-op-2 + dependentTasks: + - createpvc-10 + - createpvc-12 + - huggingface-importer-op-2 + - sdg-op-2 + inputs: + parameters: + max_batch_len: + componentInputParameter: pipelinechannel--sdg_max_batch_len + taskInfo: + name: data-processing-op-2 + deletepvc-10: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-10 + dependentTasks: + - createpvc-11 + - run-final-eval-op + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-11 + taskInfo: + name: deletepvc-10 + deletepvc-11: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-11 + dependentTasks: + - createpvc-10 + - run-final-eval-op + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 + taskInfo: + name: deletepvc-11 + deletepvc-12: + cachingOptions: + enableCache: true + componentRef: + name: comp-deletepvc-12 + dependentTasks: + - createpvc-12 + - run-final-eval-op + inputs: + parameters: + pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-12 + taskInfo: + name: deletepvc-12 + git-clone-op-2: + cachingOptions: {} + componentRef: + name: comp-git-clone-op-2 + dependentTasks: + - createpvc-10 + inputs: + parameters: + repo_branch: + componentInputParameter: pipelinechannel--sdg_repo_branch + repo_pr: + componentInputParameter: pipelinechannel--sdg_repo_pr + repo_url: + componentInputParameter: pipelinechannel--sdg_repo_url + taskInfo: + name: git-clone-op-2 + huggingface-importer-op-2: + cachingOptions: {} + componentRef: + name: comp-huggingface-importer-op-2 + dependentTasks: + - createpvc-12 + inputs: + parameters: + repo_name: + componentInputParameter: pipelinechannel--sdg_base_model + taskInfo: + name: huggingface-importer-op-2 + knowledge-processed-data-to-artifact-op-2: + cachingOptions: {} + componentRef: + name: comp-knowledge-processed-data-to-artifact-op-2 + dependentTasks: + - createpvc-10 + - data-processing-op-2 + taskInfo: + name: knowledge-processed-data-to-artifact-op-2 + kubectl-apply-op-3: + cachingOptions: {} + componentRef: + name: comp-kubectl-apply-op-3 + dependentTasks: + - data-processing-op-2 + - huggingface-importer-op-2 + - pytorchjob-manifest-op-3 + inputs: + parameters: + manifest: + taskOutputParameter: + outputParameterKey: manifest + producerTask: pytorchjob-manifest-op-3 + taskInfo: + name: kubectl-apply-op-3 + kubectl-apply-op-4: + cachingOptions: {} + componentRef: + name: comp-kubectl-apply-op-4 + dependentTasks: + - pytorchjob-manifest-op-4 + inputs: + parameters: + manifest: + taskOutputParameter: + outputParameterKey: manifest + producerTask: pytorchjob-manifest-op-4 + taskInfo: + name: kubectl-apply-op-4 + kubectl-wait-for-op-3: + cachingOptions: {} + componentRef: + name: comp-kubectl-wait-for-op-3 + dependentTasks: + - kubectl-apply-op-3 + - pytorchjob-manifest-op-3 + inputs: + parameters: + condition: + runtimeValue: + constant: condition=Succeeded + kind: + runtimeValue: + constant: pytorchjobs + name: + taskOutputParameter: + outputParameterKey: name + producerTask: pytorchjob-manifest-op-3 + taskInfo: + name: kubectl-wait-for-op-3 + kubectl-wait-for-op-4: + cachingOptions: {} + componentRef: + name: comp-kubectl-wait-for-op-4 + dependentTasks: + - kubectl-apply-op-4 + - pytorchjob-manifest-op-4 + inputs: + parameters: + condition: + runtimeValue: + constant: condition=Succeeded + kind: + runtimeValue: + constant: pytorchjobs + name: + taskOutputParameter: + outputParameterKey: name + producerTask: pytorchjob-manifest-op-4 + taskInfo: + name: kubectl-wait-for-op-4 + list-models-in-directory-op-2: + cachingOptions: {} + componentRef: + name: comp-list-models-in-directory-op-2 + dependentTasks: + - createpvc-11 + - kubectl-wait-for-op-4 + inputs: + parameters: + models_folder: + runtimeValue: + constant: /output/phase_2/model/hf_format + taskInfo: + name: list-models-in-directory-op-2 + pvc-to-model-op: + cachingOptions: {} + componentRef: + name: comp-pvc-to-model-op + dependentTasks: + - createpvc-11 + - run-mt-bench-op + inputs: + parameters: + pvc_path: + runtimeValue: + constant: /output/phase_2/model/hf_format/candidate_model + taskInfo: + name: pvc-to-model-op + pvc-to-mt-bench-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-pvc-to-mt-bench-op + dependentTasks: + - createpvc-11 + - run-mt-bench-op + inputs: + parameters: + pvc_path: + runtimeValue: + constant: /output/mt_bench_data.json + taskInfo: + name: pvc-to-mt-bench-op + pytorchjob-manifest-op-3: + cachingOptions: {} + componentRef: + name: comp-pytorchjob-manifest-op-3 + dependentTasks: + - createpvc-10 + - createpvc-11 + - createpvc-12 + inputs: + parameters: + effective_batch_size: + componentInputParameter: pipelinechannel--train_effective_batch_size_phase_1 + input_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 + learning_rate: + componentInputParameter: pipelinechannel--train_learning_rate_phase_1 + max_batch_len: + componentInputParameter: pipelinechannel--train_max_batch_len + model_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-12 + name_suffix: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 + nnodes: + componentInputParameter: pipelinechannel--train_nnodes + nproc_per_node: + componentInputParameter: pipelinechannel--train_nproc_per_node + num_epochs: + componentInputParameter: pipelinechannel--train_num_epochs_phase_1 + num_warmup_steps: + componentInputParameter: pipelinechannel--train_num_warmup_steps_phase_1 + output_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-11 + phase_num: + runtimeValue: + constant: 1.0 + save_samples: + componentInputParameter: pipelinechannel--train_save_samples + seed: + componentInputParameter: pipelinechannel--train_seed + taskInfo: + name: pytorchjob-manifest-op-3 + pytorchjob-manifest-op-4: + cachingOptions: {} + componentRef: + name: comp-pytorchjob-manifest-op-4 + dependentTasks: + - createpvc-10 + - createpvc-11 + - createpvc-12 + - kubectl-wait-for-op-3 + inputs: + parameters: + effective_batch_size: + componentInputParameter: pipelinechannel--train_effective_batch_size_phase_2 + input_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 + learning_rate: + componentInputParameter: pipelinechannel--train_learning_rate_phase_2 + max_batch_len: + componentInputParameter: pipelinechannel--train_max_batch_len + model_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-12 + name_suffix: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 + nnodes: + componentInputParameter: pipelinechannel--train_nnodes + nproc_per_node: + componentInputParameter: pipelinechannel--train_nproc_per_node + num_epochs: + componentInputParameter: pipelinechannel--train_num_epochs_phase_2 + num_warmup_steps: + componentInputParameter: pipelinechannel--train_num_warmup_steps_phase_2 + output_pvc_name: + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-11 + phase_num: + runtimeValue: + constant: 2.0 + save_samples: + componentInputParameter: pipelinechannel--train_save_samples + seed: + componentInputParameter: pipelinechannel--train_seed + taskInfo: + name: pytorchjob-manifest-op-4 + run-final-eval-op: + cachingOptions: + enableCache: true + componentRef: + name: comp-run-final-eval-op + dependentTasks: + - createpvc-10 + - createpvc-11 + - createpvc-12 + - run-mt-bench-op + inputs: + parameters: + base_branch: + componentInputParameter: pipelinechannel--sdg_repo_branch + base_model_dir: + runtimeValue: + constant: /model/ + batch_size: + componentInputParameter: pipelinechannel--final_eval_batch_size + candidate_branch: + componentInputParameter: pipelinechannel--sdg_repo_branch + candidate_model: + runtimeValue: + constant: /output/phase_2/model/hf_format/candidate_model + few_shots: + componentInputParameter: pipelinechannel--final_eval_few_shots + max_workers: + componentInputParameter: pipelinechannel--final_eval_max_workers + merge_system_user_message: + componentInputParameter: pipelinechannel--final_eval_merge_system_user_message + taskInfo: + name: run-final-eval-op + run-mt-bench-op: + cachingOptions: {} + componentRef: + name: comp-run-mt-bench-op + dependentTasks: + - createpvc-11 + - list-models-in-directory-op-2 + inputs: + parameters: + max_workers: + componentInputParameter: pipelinechannel--mt_bench_max_workers + merge_system_user_message: + componentInputParameter: pipelinechannel--mt_bench_merge_system_user_message + models_list: + taskOutputParameter: + outputParameterKey: Output + producerTask: list-models-in-directory-op-2 + models_path_prefix: + runtimeValue: + constant: /output/phase_2/model/hf_format + taskInfo: + name: run-mt-bench-op + sdg-op-2: + cachingOptions: {} + componentRef: + name: comp-sdg-op-2 + dependentTasks: + - createpvc-10 + - git-clone-op-2 + inputs: + parameters: + num_instructions_to_generate: + componentInputParameter: pipelinechannel--sdg_scale_factor + pipeline: + componentInputParameter: pipelinechannel--sdg_pipeline + repo_branch: + componentInputParameter: pipelinechannel--sdg_repo_branch + repo_pr: + componentInputParameter: pipelinechannel--sdg_repo_pr + taskInfo: + name: sdg-op-2 + sdg-to-artifact-op-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-sdg-to-artifact-op-2 + dependentTasks: + - createpvc-10 + - git-clone-op-2 + - sdg-op-2 + taskInfo: + name: sdg-to-artifact-op-2 + skills-processed-data-to-artifact-op-2: + cachingOptions: {} + componentRef: + name: comp-skills-processed-data-to-artifact-op-2 + dependentTasks: + - createpvc-10 + - data-processing-op-2 + taskInfo: + name: skills-processed-data-to-artifact-op-2 + taxonomy-to-artifact-op-2: + cachingOptions: + enableCache: true + componentRef: + name: comp-taxonomy-to-artifact-op-2 + dependentTasks: + - createpvc-10 + - git-clone-op-2 + - sdg-op-2 + taskInfo: + name: taxonomy-to-artifact-op-2 + inputDefinitions: + parameters: + pipelinechannel--final_eval_batch_size: + parameterType: STRING + pipelinechannel--final_eval_few_shots: + parameterType: NUMBER_INTEGER + pipelinechannel--final_eval_max_workers: + parameterType: STRING + pipelinechannel--final_eval_merge_system_user_message: + parameterType: BOOLEAN + pipelinechannel--k8s_storage_class_name: + parameterType: STRING + pipelinechannel--mt_bench_max_workers: + parameterType: STRING + pipelinechannel--mt_bench_merge_system_user_message: + parameterType: BOOLEAN + pipelinechannel--sdg_base_model: + parameterType: STRING + pipelinechannel--sdg_max_batch_len: + parameterType: NUMBER_INTEGER + pipelinechannel--sdg_pipeline: + parameterType: STRING + pipelinechannel--sdg_repo_branch: + parameterType: STRING + pipelinechannel--sdg_repo_pr: + parameterType: NUMBER_INTEGER + pipelinechannel--sdg_repo_url: + parameterType: STRING + pipelinechannel--sdg_scale_factor: + parameterType: NUMBER_INTEGER + pipelinechannel--train_effective_batch_size_phase_1: + parameterType: NUMBER_INTEGER + pipelinechannel--train_effective_batch_size_phase_2: + parameterType: NUMBER_INTEGER + pipelinechannel--train_learning_rate_phase_1: + parameterType: NUMBER_DOUBLE + pipelinechannel--train_learning_rate_phase_2: + parameterType: NUMBER_DOUBLE + pipelinechannel--train_max_batch_len: + parameterType: NUMBER_INTEGER + pipelinechannel--train_nnodes: + parameterType: NUMBER_INTEGER + pipelinechannel--train_nproc_per_node: + parameterType: NUMBER_INTEGER + pipelinechannel--train_num_epochs_phase_1: + parameterType: NUMBER_INTEGER + pipelinechannel--train_num_epochs_phase_2: + parameterType: NUMBER_INTEGER + pipelinechannel--train_num_warmup_steps_phase_1: + parameterType: NUMBER_INTEGER + pipelinechannel--train_num_warmup_steps_phase_2: + parameterType: NUMBER_INTEGER + pipelinechannel--train_only: + parameterType: BOOLEAN + pipelinechannel--train_save_samples: + parameterType: NUMBER_INTEGER + pipelinechannel--train_seed: + parameterType: NUMBER_INTEGER comp-createpvc: executorLabel: exec-createpvc inputDefinitions: @@ -91,8 +1327,8 @@ components: parameters: name: parameterType: STRING - comp-createpvc-2: - executorLabel: exec-createpvc-2 + comp-createpvc-10: + executorLabel: exec-createpvc-10 inputDefinitions: parameters: access_modes: @@ -152,8 +1388,8 @@ components: parameters: name: parameterType: STRING - comp-createpvc-3: - executorLabel: exec-createpvc-3 + comp-createpvc-11: + executorLabel: exec-createpvc-11 inputDefinitions: parameters: access_modes: @@ -213,438 +1449,1434 @@ components: parameters: name: parameterType: STRING - comp-data-processing-op: - executorLabel: exec-data-processing-op + comp-createpvc-12: + executorLabel: exec-createpvc-12 inputDefinitions: parameters: - knowledge_path: - defaultValue: /data/knowledge - isOptional: true - parameterType: STRING - max_batch_len: - defaultValue: 20000.0 - isOptional: true - parameterType: NUMBER_INTEGER - max_seq_len: - defaultValue: 4096.0 + access_modes: + description: 'AccessModes to request for the provisioned PVC. May + + be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, + or + + ``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes + `_.' + parameterType: LIST + annotations: + description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations + `_. isOptional: true - parameterType: NUMBER_INTEGER - model_path: - defaultValue: /model + parameterType: STRUCT + pvc_name: + description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name + `_. + Only one of ``pvc_name`` and ``pvc_name_suffix`` can + + be provided.' isOptional: true parameterType: STRING - sdg_path: - defaultValue: /data/sdg + pvc_name_suffix: + description: 'Prefix to use for a dynamically generated name, which + + will take the form ``-``. Only one + + of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' isOptional: true parameterType: STRING - skills_path: - defaultValue: /data/skills + size: + description: The size of storage requested by the PVC that will be provisioned. + For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage + `_. + parameterType: STRING + storage_class_name: + defaultValue: '' + description: 'Name of StorageClass from which to provision the PV + + to back the PVC. ``None`` indicates to use the cluster''s default + + storage_class_name. Set to ``''''`` for a statically specified PVC.' isOptional: true parameterType: STRING - comp-deletepvc: - executorLabel: exec-deletepvc - inputDefinitions: - parameters: - pvc_name: - description: Name of the PVC to delete. Supports passing a runtime-generated - name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + volume_name: + description: 'Pre-existing PersistentVolume that should back the + + provisioned PersistentVolumeClaim. Used for statically + + specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName + `_.' + isOptional: true parameterType: STRING - comp-deletepvc-2: - executorLabel: exec-deletepvc-2 - inputDefinitions: + outputDefinitions: parameters: - pvc_name: - description: Name of the PVC to delete. Supports passing a runtime-generated - name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + name: parameterType: STRING - comp-deletepvc-3: - executorLabel: exec-deletepvc-3 + comp-createpvc-2: + executorLabel: exec-createpvc-2 inputDefinitions: parameters: + access_modes: + description: 'AccessModes to request for the provisioned PVC. May + + be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, + or + + ``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes + `_.' + parameterType: LIST + annotations: + description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations + `_. + isOptional: true + parameterType: STRUCT pvc_name: - description: Name of the PVC to delete. Supports passing a runtime-generated - name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. - parameterType: STRING - comp-git-clone-op: - executorLabel: exec-git-clone-op - inputDefinitions: - parameters: - repo_branch: - parameterType: STRING - repo_pr: - parameterType: NUMBER_INTEGER - repo_url: - parameterType: STRING - taxonomy_path: - defaultValue: /data/taxonomy + description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name + `_. + Only one of ``pvc_name`` and ``pvc_name_suffix`` can + + be provided.' isOptional: true parameterType: STRING - comp-huggingface-importer-op: - executorLabel: exec-huggingface-importer-op - inputDefinitions: - parameters: - model_path: - defaultValue: /model + pvc_name_suffix: + description: 'Prefix to use for a dynamically generated name, which + + will take the form ``-``. Only one + + of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' isOptional: true parameterType: STRING - repo_name: + size: + description: The size of storage requested by the PVC that will be provisioned. + For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage + `_. parameterType: STRING - comp-knowledge-processed-data-to-artifact-op: - executorLabel: exec-knowledge-processed-data-to-artifact-op - inputDefinitions: - parameters: - pvc_path: - defaultValue: /data/knowledge + storage_class_name: + defaultValue: '' + description: 'Name of StorageClass from which to provision the PV + + to back the PVC. ``None`` indicates to use the cluster''s default + + storage_class_name. Set to ``''''`` for a statically specified PVC.' isOptional: true parameterType: STRING - outputDefinitions: - artifacts: - knowledge_processed_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - comp-kubectl-apply-op: - executorLabel: exec-kubectl-apply-op - inputDefinitions: - parameters: - manifest: - parameterType: STRING - comp-kubectl-apply-op-2: - executorLabel: exec-kubectl-apply-op-2 - inputDefinitions: - parameters: - manifest: + volume_name: + description: 'Pre-existing PersistentVolume that should back the + + provisioned PersistentVolumeClaim. Used for statically + + specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName + `_.' + isOptional: true parameterType: STRING - comp-kubectl-wait-for-op: - executorLabel: exec-kubectl-wait-for-op - inputDefinitions: + outputDefinitions: parameters: - condition: - parameterType: STRING - kind: - parameterType: STRING name: parameterType: STRING - comp-kubectl-wait-for-op-2: - executorLabel: exec-kubectl-wait-for-op-2 + comp-createpvc-3: + executorLabel: exec-createpvc-3 inputDefinitions: parameters: - condition: - parameterType: STRING - kind: - parameterType: STRING - name: - parameterType: STRING - comp-list-models-in-directory-op: - executorLabel: exec-list-models-in-directory-op - inputDefinitions: - parameters: - models_folder: - parameterType: STRING - outputDefinitions: - parameters: - Output: + access_modes: + description: 'AccessModes to request for the provisioned PVC. May + + be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, + or + + ``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes + `_.' parameterType: LIST - comp-pvc-to-model-op: - executorLabel: exec-pvc-to-model-op - inputDefinitions: - parameters: - pvc_path: - parameterType: STRING - outputDefinitions: - artifacts: - model: - artifactType: - schemaTitle: system.Model - schemaVersion: 0.0.1 - comp-pvc-to-mt-bench-op: - executorLabel: exec-pvc-to-mt-bench-op - inputDefinitions: - parameters: - pvc_path: - parameterType: STRING - outputDefinitions: - artifacts: - mt_bench_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-pytorchjob-manifest-op: - executorLabel: exec-pytorchjob-manifest-op - inputDefinitions: - parameters: - effective_batch_size: - defaultValue: 3840.0 + annotations: + description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations + `_. isOptional: true - parameterType: NUMBER_INTEGER - input_pvc_name: - parameterType: STRING - learning_rate: - defaultValue: 0.0001 + parameterType: STRUCT + pvc_name: + description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name + `_. + Only one of ``pvc_name`` and ``pvc_name_suffix`` can + + be provided.' isOptional: true - parameterType: NUMBER_DOUBLE - max_batch_len: - defaultValue: 20000.0 + parameterType: STRING + pvc_name_suffix: + description: 'Prefix to use for a dynamically generated name, which + + will take the form ``-``. Only one + + of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' isOptional: true - parameterType: NUMBER_INTEGER - model_pvc_name: parameterType: STRING - name_suffix: + size: + description: The size of storage requested by the PVC that will be provisioned. + For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage + `_. parameterType: STRING - nnodes: - defaultValue: 2.0 - isOptional: true - parameterType: NUMBER_INTEGER - nproc_per_node: - defaultValue: 3.0 - isOptional: true - parameterType: NUMBER_INTEGER - num_epochs: - defaultValue: 2.0 - isOptional: true - parameterType: NUMBER_INTEGER - num_warmup_steps: - defaultValue: 800.0 + storage_class_name: + defaultValue: '' + description: 'Name of StorageClass from which to provision the PV + + to back the PVC. ``None`` indicates to use the cluster''s default + + storage_class_name. Set to ``''''`` for a statically specified PVC.' isOptional: true - parameterType: NUMBER_INTEGER - output_pvc_name: parameterType: STRING - phase_num: - parameterType: NUMBER_INTEGER - save_samples: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - seed: - defaultValue: 42.0 + volume_name: + description: 'Pre-existing PersistentVolume that should back the + + provisioned PersistentVolumeClaim. Used for statically + + specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName + `_.' isOptional: true - parameterType: NUMBER_INTEGER + parameterType: STRING outputDefinitions: parameters: - manifest: - parameterType: STRING name: parameterType: STRING - comp-pytorchjob-manifest-op-2: - executorLabel: exec-pytorchjob-manifest-op-2 + comp-createpvc-4: + executorLabel: exec-createpvc-4 inputDefinitions: parameters: - effective_batch_size: - defaultValue: 3840.0 + access_modes: + description: 'AccessModes to request for the provisioned PVC. May + + be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, + or + + ``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes + `_.' + parameterType: LIST + annotations: + description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations + `_. isOptional: true - parameterType: NUMBER_INTEGER - input_pvc_name: - parameterType: STRING - learning_rate: - defaultValue: 0.0001 + parameterType: STRUCT + pvc_name: + description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name + `_. + Only one of ``pvc_name`` and ``pvc_name_suffix`` can + + be provided.' isOptional: true - parameterType: NUMBER_DOUBLE - max_batch_len: - defaultValue: 20000.0 + parameterType: STRING + pvc_name_suffix: + description: 'Prefix to use for a dynamically generated name, which + + will take the form ``-``. Only one + + of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' isOptional: true - parameterType: NUMBER_INTEGER - model_pvc_name: parameterType: STRING - name_suffix: + size: + description: The size of storage requested by the PVC that will be provisioned. + For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage + `_. parameterType: STRING - nnodes: - defaultValue: 2.0 - isOptional: true - parameterType: NUMBER_INTEGER - nproc_per_node: - defaultValue: 3.0 - isOptional: true - parameterType: NUMBER_INTEGER - num_epochs: - defaultValue: 2.0 - isOptional: true - parameterType: NUMBER_INTEGER - num_warmup_steps: - defaultValue: 800.0 + storage_class_name: + defaultValue: '' + description: 'Name of StorageClass from which to provision the PV + + to back the PVC. ``None`` indicates to use the cluster''s default + + storage_class_name. Set to ``''''`` for a statically specified PVC.' isOptional: true - parameterType: NUMBER_INTEGER - output_pvc_name: parameterType: STRING - phase_num: - parameterType: NUMBER_INTEGER - save_samples: - defaultValue: 0.0 - isOptional: true - parameterType: NUMBER_INTEGER - seed: - defaultValue: 42.0 + volume_name: + description: 'Pre-existing PersistentVolume that should back the + + provisioned PersistentVolumeClaim. Used for statically + + specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName + `_.' isOptional: true - parameterType: NUMBER_INTEGER + parameterType: STRING outputDefinitions: parameters: - manifest: - parameterType: STRING name: parameterType: STRING - comp-run-final-eval-op: - executorLabel: exec-run-final-eval-op + comp-createpvc-5: + executorLabel: exec-createpvc-5 inputDefinitions: parameters: - base_branch: - parameterType: STRING - base_model_dir: - parameterType: STRING - batch_size: - parameterType: STRING - candidate_branch: - parameterType: STRING - candidate_model: + access_modes: + description: 'AccessModes to request for the provisioned PVC. May + + be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, + or + + ``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes + `_.' + parameterType: LIST + annotations: + description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations + `_. isOptional: true - parameterType: STRING - few_shots: - parameterType: NUMBER_INTEGER - max_workers: - parameterType: STRING - merge_system_user_message: - parameterType: BOOLEAN - sdg_path: - defaultValue: /input/sdg + parameterType: STRUCT + pvc_name: + description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name + `_. + Only one of ``pvc_name`` and ``pvc_name_suffix`` can + + be provided.' isOptional: true parameterType: STRING - taxonomy_path: - defaultValue: /input/taxonomy + pvc_name_suffix: + description: 'Prefix to use for a dynamically generated name, which + + will take the form ``-``. Only one + + of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' isOptional: true parameterType: STRING - outputDefinitions: - artifacts: - mmlu_branch_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - mt_bench_branch_output: - artifactType: - schemaTitle: system.Artifact - schemaVersion: 0.0.1 - comp-run-mt-bench-op: - executorLabel: exec-run-mt-bench-op + size: + description: The size of storage requested by the PVC that will be provisioned. + For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage + `_. + parameterType: STRING + storage_class_name: + defaultValue: '' + description: 'Name of StorageClass from which to provision the PV + + to back the PVC. ``None`` indicates to use the cluster''s default + + storage_class_name. Set to ``''''`` for a statically specified PVC.' + isOptional: true + parameterType: STRING + volume_name: + description: 'Pre-existing PersistentVolume that should back the + + provisioned PersistentVolumeClaim. Used for statically + + specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName + `_.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + name: + parameterType: STRING + comp-createpvc-6: + executorLabel: exec-createpvc-6 inputDefinitions: parameters: - best_score_file: + access_modes: + description: 'AccessModes to request for the provisioned PVC. May + + be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, + or + + ``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes + `_.' + parameterType: LIST + annotations: + description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations + `_. + isOptional: true + parameterType: STRUCT + pvc_name: + description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name + `_. + Only one of ``pvc_name`` and ``pvc_name_suffix`` can + + be provided.' isOptional: true parameterType: STRING - max_workers: - parameterType: STRING - merge_system_user_message: - parameterType: BOOLEAN - models_folder: + pvc_name_suffix: + description: 'Prefix to use for a dynamically generated name, which + + will take the form ``-``. Only one + + of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' isOptional: true parameterType: STRING - models_list: + size: + description: The size of storage requested by the PVC that will be provisioned. + For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage + `_. + parameterType: STRING + storage_class_name: + defaultValue: '' + description: 'Name of StorageClass from which to provision the PV + + to back the PVC. ``None`` indicates to use the cluster''s default + + storage_class_name. Set to ``''''`` for a statically specified PVC.' isOptional: true - parameterType: LIST - models_path_prefix: parameterType: STRING - output_path: - defaultValue: /output/mt_bench_data.json + volume_name: + description: 'Pre-existing PersistentVolume that should back the + + provisioned PersistentVolumeClaim. Used for statically + + specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName + `_.' isOptional: true parameterType: STRING outputDefinitions: parameters: - best_model: + name: parameterType: STRING - best_score: - parameterType: NUMBER_DOUBLE - comp-sdg-op: - executorLabel: exec-sdg-op + comp-createpvc-7: + executorLabel: exec-createpvc-7 inputDefinitions: parameters: - num_instructions_to_generate: - parameterType: NUMBER_INTEGER - pipeline: - parameterType: STRING - repo_branch: + access_modes: + description: 'AccessModes to request for the provisioned PVC. May + + be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, + or + + ``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes + `_.' + parameterType: LIST + annotations: + description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations + `_. + isOptional: true + parameterType: STRUCT + pvc_name: + description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name + `_. + Only one of ``pvc_name`` and ``pvc_name_suffix`` can + + be provided.' + isOptional: true parameterType: STRING - repo_pr: - parameterType: NUMBER_INTEGER - sdg_path: - defaultValue: /data/sdg + pvc_name_suffix: + description: 'Prefix to use for a dynamically generated name, which + + will take the form ``-``. Only one + + of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' isOptional: true parameterType: STRING - taxonomy_path: - defaultValue: /data/taxonomy + size: + description: The size of storage requested by the PVC that will be provisioned. + For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage + `_. + parameterType: STRING + storage_class_name: + defaultValue: '' + description: 'Name of StorageClass from which to provision the PV + + to back the PVC. ``None`` indicates to use the cluster''s default + + storage_class_name. Set to ``''''`` for a statically specified PVC.' isOptional: true parameterType: STRING - comp-sdg-to-artifact-op: - executorLabel: exec-sdg-to-artifact-op - inputDefinitions: - parameters: - pvc_path: - defaultValue: /data/sdg + volume_name: + description: 'Pre-existing PersistentVolume that should back the + + provisioned PersistentVolumeClaim. Used for statically + + specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName + `_.' isOptional: true parameterType: STRING outputDefinitions: - artifacts: - sdg: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - comp-skills-processed-data-to-artifact-op: - executorLabel: exec-skills-processed-data-to-artifact-op - inputDefinitions: parameters: - pvc_path: - defaultValue: /data/skills - isOptional: true + name: parameterType: STRING - outputDefinitions: - artifacts: - skills_processed_data: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 - comp-taxonomy-to-artifact-op: - executorLabel: exec-taxonomy-to-artifact-op + comp-createpvc-8: + executorLabel: exec-createpvc-8 inputDefinitions: parameters: - pvc_path: - defaultValue: /data/taxonomy + access_modes: + description: 'AccessModes to request for the provisioned PVC. May + + be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, + or + + ``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes + `_.' + parameterType: LIST + annotations: + description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations + `_. + isOptional: true + parameterType: STRUCT + pvc_name: + description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name + `_. + Only one of ``pvc_name`` and ``pvc_name_suffix`` can + + be provided.' isOptional: true parameterType: STRING - outputDefinitions: - artifacts: - taxonomy: - artifactType: - schemaTitle: system.Dataset - schemaVersion: 0.0.1 -deploymentSpec: - executors: - exec-createpvc: - container: - image: argostub/createpvc - exec-createpvc-2: - container: - image: argostub/createpvc - exec-createpvc-3: - container: - image: argostub/createpvc - exec-data-processing-op: - container: - args: - - --executor_input - - '{{$}}' - - --function_to_execute - - data_processing_op - command: - - sh - - -ec - - 'program_path=$(mktemp -d) + pvc_name_suffix: + description: 'Prefix to use for a dynamically generated name, which + will take the form ``-``. Only one - printf "%s" "$0" > "$program_path/ephemeral_component.py" + of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' + isOptional: true + parameterType: STRING + size: + description: The size of storage requested by the PVC that will be provisioned. + For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage + `_. + parameterType: STRING + storage_class_name: + defaultValue: '' + description: 'Name of StorageClass from which to provision the PV - _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + to back the PVC. ``None`` indicates to use the cluster''s default - ' - - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef data_processing_op(\n model_path: str = \"/model\",\n sdg_path:\ - \ str = \"/data/sdg\",\n skills_path: str = \"/data/skills\",\n knowledge_path:\ - \ str = \"/data/knowledge\",\n max_seq_len: Optional[int] = 4096,\n \ - \ max_batch_len: Optional[int] = 20000,\n):\n import os\n\n import\ + storage_class_name. Set to ``''''`` for a statically specified PVC.' + isOptional: true + parameterType: STRING + volume_name: + description: 'Pre-existing PersistentVolume that should back the + + provisioned PersistentVolumeClaim. Used for statically + + specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName + `_.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + name: + parameterType: STRING + comp-createpvc-9: + executorLabel: exec-createpvc-9 + inputDefinitions: + parameters: + access_modes: + description: 'AccessModes to request for the provisioned PVC. May + + be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, + or + + ``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes + `_.' + parameterType: LIST + annotations: + description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations + `_. + isOptional: true + parameterType: STRUCT + pvc_name: + description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name + `_. + Only one of ``pvc_name`` and ``pvc_name_suffix`` can + + be provided.' + isOptional: true + parameterType: STRING + pvc_name_suffix: + description: 'Prefix to use for a dynamically generated name, which + + will take the form ``-``. Only one + + of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' + isOptional: true + parameterType: STRING + size: + description: The size of storage requested by the PVC that will be provisioned. + For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage + `_. + parameterType: STRING + storage_class_name: + defaultValue: '' + description: 'Name of StorageClass from which to provision the PV + + to back the PVC. ``None`` indicates to use the cluster''s default + + storage_class_name. Set to ``''''`` for a statically specified PVC.' + isOptional: true + parameterType: STRING + volume_name: + description: 'Pre-existing PersistentVolume that should back the + + provisioned PersistentVolumeClaim. Used for statically + + specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName + `_.' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + name: + parameterType: STRING + comp-data-processing-op: + executorLabel: exec-data-processing-op + inputDefinitions: + parameters: + knowledge_path: + defaultValue: /data/knowledge + isOptional: true + parameterType: STRING + max_batch_len: + defaultValue: 20000.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_seq_len: + defaultValue: 4096.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_path: + defaultValue: /model + isOptional: true + parameterType: STRING + sdg_path: + defaultValue: /data/sdg + isOptional: true + parameterType: STRING + skills_path: + defaultValue: /data/skills + isOptional: true + parameterType: STRING + comp-data-processing-op-2: + executorLabel: exec-data-processing-op-2 + inputDefinitions: + parameters: + knowledge_path: + defaultValue: /data/knowledge + isOptional: true + parameterType: STRING + max_batch_len: + defaultValue: 20000.0 + isOptional: true + parameterType: NUMBER_INTEGER + max_seq_len: + defaultValue: 4096.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_path: + defaultValue: /model + isOptional: true + parameterType: STRING + sdg_path: + defaultValue: /data/sdg + isOptional: true + parameterType: STRING + skills_path: + defaultValue: /data/skills + isOptional: true + parameterType: STRING + comp-deletepvc: + executorLabel: exec-deletepvc + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-10: + executorLabel: exec-deletepvc-10 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-11: + executorLabel: exec-deletepvc-11 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-12: + executorLabel: exec-deletepvc-12 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-2: + executorLabel: exec-deletepvc-2 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-3: + executorLabel: exec-deletepvc-3 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-4: + executorLabel: exec-deletepvc-4 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-5: + executorLabel: exec-deletepvc-5 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-6: + executorLabel: exec-deletepvc-6 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-7: + executorLabel: exec-deletepvc-7 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-8: + executorLabel: exec-deletepvc-8 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-deletepvc-9: + executorLabel: exec-deletepvc-9 + inputDefinitions: + parameters: + pvc_name: + description: Name of the PVC to delete. Supports passing a runtime-generated + name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. + parameterType: STRING + comp-get-training-data: + executorLabel: exec-get-training-data + comp-git-clone-op: + executorLabel: exec-git-clone-op + inputDefinitions: + parameters: + repo_branch: + parameterType: STRING + repo_pr: + parameterType: NUMBER_INTEGER + repo_url: + parameterType: STRING + taxonomy_path: + defaultValue: /data/taxonomy + isOptional: true + parameterType: STRING + comp-git-clone-op-2: + executorLabel: exec-git-clone-op-2 + inputDefinitions: + parameters: + repo_branch: + parameterType: STRING + repo_pr: + parameterType: NUMBER_INTEGER + repo_url: + parameterType: STRING + taxonomy_path: + defaultValue: /data/taxonomy + isOptional: true + parameterType: STRING + comp-huggingface-importer-op: + executorLabel: exec-huggingface-importer-op + inputDefinitions: + parameters: + model_path: + defaultValue: /model + isOptional: true + parameterType: STRING + repo_name: + parameterType: STRING + comp-huggingface-importer-op-2: + executorLabel: exec-huggingface-importer-op-2 + inputDefinitions: + parameters: + model_path: + defaultValue: /model + isOptional: true + parameterType: STRING + repo_name: + parameterType: STRING + comp-knowledge-processed-data-to-artifact-op: + executorLabel: exec-knowledge-processed-data-to-artifact-op + inputDefinitions: + parameters: + pvc_path: + defaultValue: /data/knowledge + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + knowledge_processed_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-knowledge-processed-data-to-artifact-op-2: + executorLabel: exec-knowledge-processed-data-to-artifact-op-2 + inputDefinitions: + parameters: + pvc_path: + defaultValue: /data/knowledge + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + knowledge_processed_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-kubectl-apply-op: + executorLabel: exec-kubectl-apply-op + inputDefinitions: + parameters: + manifest: + parameterType: STRING + comp-kubectl-apply-op-2: + executorLabel: exec-kubectl-apply-op-2 + inputDefinitions: + parameters: + manifest: + parameterType: STRING + comp-kubectl-apply-op-3: + executorLabel: exec-kubectl-apply-op-3 + inputDefinitions: + parameters: + manifest: + parameterType: STRING + comp-kubectl-apply-op-4: + executorLabel: exec-kubectl-apply-op-4 + inputDefinitions: + parameters: + manifest: + parameterType: STRING + comp-kubectl-wait-for-op: + executorLabel: exec-kubectl-wait-for-op + inputDefinitions: + parameters: + condition: + parameterType: STRING + kind: + parameterType: STRING + name: + parameterType: STRING + comp-kubectl-wait-for-op-2: + executorLabel: exec-kubectl-wait-for-op-2 + inputDefinitions: + parameters: + condition: + parameterType: STRING + kind: + parameterType: STRING + name: + parameterType: STRING + comp-kubectl-wait-for-op-3: + executorLabel: exec-kubectl-wait-for-op-3 + inputDefinitions: + parameters: + condition: + parameterType: STRING + kind: + parameterType: STRING + name: + parameterType: STRING + comp-kubectl-wait-for-op-4: + executorLabel: exec-kubectl-wait-for-op-4 + inputDefinitions: + parameters: + condition: + parameterType: STRING + kind: + parameterType: STRING + name: + parameterType: STRING + comp-list-models-in-directory-op: + executorLabel: exec-list-models-in-directory-op + inputDefinitions: + parameters: + models_folder: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-list-models-in-directory-op-2: + executorLabel: exec-list-models-in-directory-op-2 + inputDefinitions: + parameters: + models_folder: + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: LIST + comp-pvc-to-model-op: + executorLabel: exec-pvc-to-model-op + inputDefinitions: + parameters: + pvc_path: + parameterType: STRING + outputDefinitions: + artifacts: + model: + artifactType: + schemaTitle: system.Model + schemaVersion: 0.0.1 + comp-pvc-to-mt-bench-op: + executorLabel: exec-pvc-to-mt-bench-op + inputDefinitions: + parameters: + pvc_path: + parameterType: STRING + outputDefinitions: + artifacts: + mt_bench_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-pytorchjob-manifest-op: + executorLabel: exec-pytorchjob-manifest-op + inputDefinitions: + parameters: + effective_batch_size: + defaultValue: 3840.0 + isOptional: true + parameterType: NUMBER_INTEGER + input_pvc_name: + parameterType: STRING + learning_rate: + defaultValue: 0.0001 + isOptional: true + parameterType: NUMBER_DOUBLE + max_batch_len: + defaultValue: 20000.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_pvc_name: + parameterType: STRING + name_suffix: + parameterType: STRING + nnodes: + defaultValue: 2.0 + isOptional: true + parameterType: NUMBER_INTEGER + nproc_per_node: + defaultValue: 3.0 + isOptional: true + parameterType: NUMBER_INTEGER + num_epochs: + defaultValue: 2.0 + isOptional: true + parameterType: NUMBER_INTEGER + num_warmup_steps: + defaultValue: 800.0 + isOptional: true + parameterType: NUMBER_INTEGER + output_pvc_name: + parameterType: STRING + phase_num: + parameterType: NUMBER_INTEGER + save_samples: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + seed: + defaultValue: 42.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + manifest: + parameterType: STRING + name: + parameterType: STRING + comp-pytorchjob-manifest-op-2: + executorLabel: exec-pytorchjob-manifest-op-2 + inputDefinitions: + parameters: + effective_batch_size: + defaultValue: 3840.0 + isOptional: true + parameterType: NUMBER_INTEGER + input_pvc_name: + parameterType: STRING + learning_rate: + defaultValue: 0.0001 + isOptional: true + parameterType: NUMBER_DOUBLE + max_batch_len: + defaultValue: 20000.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_pvc_name: + parameterType: STRING + name_suffix: + parameterType: STRING + nnodes: + defaultValue: 2.0 + isOptional: true + parameterType: NUMBER_INTEGER + nproc_per_node: + defaultValue: 3.0 + isOptional: true + parameterType: NUMBER_INTEGER + num_epochs: + defaultValue: 2.0 + isOptional: true + parameterType: NUMBER_INTEGER + num_warmup_steps: + defaultValue: 800.0 + isOptional: true + parameterType: NUMBER_INTEGER + output_pvc_name: + parameterType: STRING + phase_num: + parameterType: NUMBER_INTEGER + save_samples: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + seed: + defaultValue: 42.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + manifest: + parameterType: STRING + name: + parameterType: STRING + comp-pytorchjob-manifest-op-3: + executorLabel: exec-pytorchjob-manifest-op-3 + inputDefinitions: + parameters: + effective_batch_size: + defaultValue: 3840.0 + isOptional: true + parameterType: NUMBER_INTEGER + input_pvc_name: + parameterType: STRING + learning_rate: + defaultValue: 0.0001 + isOptional: true + parameterType: NUMBER_DOUBLE + max_batch_len: + defaultValue: 20000.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_pvc_name: + parameterType: STRING + name_suffix: + parameterType: STRING + nnodes: + defaultValue: 2.0 + isOptional: true + parameterType: NUMBER_INTEGER + nproc_per_node: + defaultValue: 3.0 + isOptional: true + parameterType: NUMBER_INTEGER + num_epochs: + defaultValue: 2.0 + isOptional: true + parameterType: NUMBER_INTEGER + num_warmup_steps: + defaultValue: 800.0 + isOptional: true + parameterType: NUMBER_INTEGER + output_pvc_name: + parameterType: STRING + phase_num: + parameterType: NUMBER_INTEGER + save_samples: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + seed: + defaultValue: 42.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + manifest: + parameterType: STRING + name: + parameterType: STRING + comp-pytorchjob-manifest-op-4: + executorLabel: exec-pytorchjob-manifest-op-4 + inputDefinitions: + parameters: + effective_batch_size: + defaultValue: 3840.0 + isOptional: true + parameterType: NUMBER_INTEGER + input_pvc_name: + parameterType: STRING + learning_rate: + defaultValue: 0.0001 + isOptional: true + parameterType: NUMBER_DOUBLE + max_batch_len: + defaultValue: 20000.0 + isOptional: true + parameterType: NUMBER_INTEGER + model_pvc_name: + parameterType: STRING + name_suffix: + parameterType: STRING + nnodes: + defaultValue: 2.0 + isOptional: true + parameterType: NUMBER_INTEGER + nproc_per_node: + defaultValue: 3.0 + isOptional: true + parameterType: NUMBER_INTEGER + num_epochs: + defaultValue: 2.0 + isOptional: true + parameterType: NUMBER_INTEGER + num_warmup_steps: + defaultValue: 800.0 + isOptional: true + parameterType: NUMBER_INTEGER + output_pvc_name: + parameterType: STRING + phase_num: + parameterType: NUMBER_INTEGER + save_samples: + defaultValue: 0.0 + isOptional: true + parameterType: NUMBER_INTEGER + seed: + defaultValue: 42.0 + isOptional: true + parameterType: NUMBER_INTEGER + outputDefinitions: + parameters: + manifest: + parameterType: STRING + name: + parameterType: STRING + comp-run-final-eval-op: + executorLabel: exec-run-final-eval-op + inputDefinitions: + parameters: + base_branch: + parameterType: STRING + base_model_dir: + parameterType: STRING + batch_size: + parameterType: STRING + candidate_branch: + parameterType: STRING + candidate_model: + isOptional: true + parameterType: STRING + few_shots: + parameterType: NUMBER_INTEGER + max_workers: + parameterType: STRING + merge_system_user_message: + parameterType: BOOLEAN + sdg_path: + defaultValue: /input/sdg + isOptional: true + parameterType: STRING + taxonomy_path: + defaultValue: /input/taxonomy + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + mmlu_branch_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + mt_bench_branch_output: + artifactType: + schemaTitle: system.Artifact + schemaVersion: 0.0.1 + comp-run-mt-bench-op: + executorLabel: exec-run-mt-bench-op + inputDefinitions: + parameters: + best_score_file: + isOptional: true + parameterType: STRING + max_workers: + parameterType: STRING + merge_system_user_message: + parameterType: BOOLEAN + models_folder: + isOptional: true + parameterType: STRING + models_list: + isOptional: true + parameterType: LIST + models_path_prefix: + parameterType: STRING + output_path: + defaultValue: /output/mt_bench_data.json + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + best_model: + parameterType: STRING + best_score: + parameterType: NUMBER_DOUBLE + comp-sdg-op: + executorLabel: exec-sdg-op + inputDefinitions: + parameters: + num_instructions_to_generate: + parameterType: NUMBER_INTEGER + pipeline: + parameterType: STRING + repo_branch: + parameterType: STRING + repo_pr: + parameterType: NUMBER_INTEGER + sdg_path: + defaultValue: /data/sdg + isOptional: true + parameterType: STRING + taxonomy_path: + defaultValue: /data/taxonomy + isOptional: true + parameterType: STRING + comp-sdg-op-2: + executorLabel: exec-sdg-op-2 + inputDefinitions: + parameters: + num_instructions_to_generate: + parameterType: NUMBER_INTEGER + pipeline: + parameterType: STRING + repo_branch: + parameterType: STRING + repo_pr: + parameterType: NUMBER_INTEGER + sdg_path: + defaultValue: /data/sdg + isOptional: true + parameterType: STRING + taxonomy_path: + defaultValue: /data/taxonomy + isOptional: true + parameterType: STRING + comp-sdg-to-artifact-op: + executorLabel: exec-sdg-to-artifact-op + inputDefinitions: + parameters: + pvc_path: + defaultValue: /data/sdg + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + sdg: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-sdg-to-artifact-op-2: + executorLabel: exec-sdg-to-artifact-op-2 + inputDefinitions: + parameters: + pvc_path: + defaultValue: /data/sdg + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + sdg: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-skills-processed-data-to-artifact-op: + executorLabel: exec-skills-processed-data-to-artifact-op + inputDefinitions: + parameters: + pvc_path: + defaultValue: /data/skills + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + skills_processed_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-skills-processed-data-to-artifact-op-2: + executorLabel: exec-skills-processed-data-to-artifact-op-2 + inputDefinitions: + parameters: + pvc_path: + defaultValue: /data/skills + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + skills_processed_data: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-taxonomy-to-artifact-op: + executorLabel: exec-taxonomy-to-artifact-op + inputDefinitions: + parameters: + pvc_path: + defaultValue: /data/taxonomy + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + taxonomy: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 + comp-taxonomy-to-artifact-op-2: + executorLabel: exec-taxonomy-to-artifact-op-2 + inputDefinitions: + parameters: + pvc_path: + defaultValue: /data/taxonomy + isOptional: true + parameterType: STRING + outputDefinitions: + artifacts: + taxonomy: + artifactType: + schemaTitle: system.Dataset + schemaVersion: 0.0.1 +deploymentSpec: + executors: + exec-createpvc: + container: + image: argostub/createpvc + exec-createpvc-10: + container: + image: argostub/createpvc + exec-createpvc-11: + container: + image: argostub/createpvc + exec-createpvc-12: + container: + image: argostub/createpvc + exec-createpvc-2: + container: + image: argostub/createpvc + exec-createpvc-3: + container: + image: argostub/createpvc + exec-createpvc-4: + container: + image: argostub/createpvc + exec-createpvc-5: + container: + image: argostub/createpvc + exec-createpvc-6: + container: + image: argostub/createpvc + exec-createpvc-7: + container: + image: argostub/createpvc + exec-createpvc-8: + container: + image: argostub/createpvc + exec-createpvc-9: + container: + image: argostub/createpvc + exec-data-processing-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - data_processing_op + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef data_processing_op(\n model_path: str = \"/model\",\n sdg_path:\ + \ str = \"/data/sdg\",\n skills_path: str = \"/data/skills\",\n knowledge_path:\ + \ str = \"/data/knowledge\",\n max_seq_len: Optional[int] = 4096,\n \ + \ max_batch_len: Optional[int] = 20000,\n):\n import os\n\n import\ + \ instructlab.training.data_process as dp\n from instructlab.training\ + \ import (\n DataProcessArgs,\n TrainingArgs,\n )\n\n \ + \ # define training-specific arguments\n skill_training_args = TrainingArgs(\n\ + \ # define data-specific arguments\n model_path=model_path,\n\ + \ data_path=f\"{sdg_path}/skills_train_msgs*.jsonl\",\n data_output_dir=skills_path,\n\ + \ # define model-trianing parameters\n max_seq_len=max_seq_len,\n\ + \ max_batch_len=max_batch_len,\n # XXX(shanand): We don't\ + \ need the following arguments\n # for data processing. Added them\ + \ for now to avoid\n # Pydantic validation errors for TrainingArgs\n\ + \ ckpt_output_dir=\"data/saved_checkpoints\",\n num_epochs=2,\n\ + \ effective_batch_size=3840,\n save_samples=0,\n learning_rate=2e-6,\n\ + \ warmup_steps=800,\n is_padding_free=True,\n )\n\n \ + \ knowledge_training_args = TrainingArgs(\n # define data-specific\ + \ arguments\n model_path=model_path,\n data_path=f\"{sdg_path}/knowledge_train_msgs*.jsonl\"\ + ,\n data_output_dir=knowledge_path,\n # define model-trianing\ + \ parameters\n max_seq_len=max_seq_len,\n max_batch_len=max_batch_len,\n\ + \ # XXX(shanand): We don't need the following arguments\n \ + \ # for data processing. Added them for now to avoid\n # Pydantic\ + \ validation errors for TrainingArgs\n ckpt_output_dir=\"data/saved_checkpoints\"\ + ,\n num_epochs=2,\n effective_batch_size=3840,\n save_samples=0,\n\ + \ learning_rate=2e-6,\n warmup_steps=800,\n is_padding_free=True,\n\ + \ )\n\n def data_processing(train_args: TrainingArgs) -> None:\n \ + \ # early validation logic here\n if train_args.max_batch_len\ + \ < train_args.max_seq_len:\n raise ValueError(\n \ + \ f\"the 'max_batch_len' cannot be less than 'max_seq_len': {train_args.max_batch_len=}\ + \ < {train_args.max_seq_len=}\"\n )\n\n # process\ + \ the training data\n if not os.path.exists(train_args.data_output_dir):\n\ + \ os.makedirs(train_args.data_output_dir, exist_ok=True)\n \ + \ dp.main(\n DataProcessArgs(\n # XXX(osilkin):\ + \ make a decision here, either:\n # 1. the CLI is fully\ + \ responsible for managing where the data is written\n #\ + \ 2. we never cache it and simply write it to a tmp file every time.\n\ + \ #\n # An important reason for why #1 would\ + \ be preferable is in the case of OpenShift/SELinux\n # where\ + \ the user has a defined place for new temporary data to be written.\n \ + \ data_output_path=train_args.data_output_dir,\n \ + \ model_path=train_args.model_path,\n data_path=train_args.data_path,\n\ + \ max_seq_len=train_args.max_seq_len,\n chat_tmpl_path=train_args.chat_tmpl_path,\n\ + \ )\n )\n\n data_processing(train_args=skill_training_args)\n\ + \ data_processing(train_args=knowledge_training_args)\n\n" + image: quay.io/redhat-et/ilab:1.2 + exec-data-processing-op-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - data_processing_op + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef data_processing_op(\n model_path: str = \"/model\",\n sdg_path:\ + \ str = \"/data/sdg\",\n skills_path: str = \"/data/skills\",\n knowledge_path:\ + \ str = \"/data/knowledge\",\n max_seq_len: Optional[int] = 4096,\n \ + \ max_batch_len: Optional[int] = 20000,\n):\n import os\n\n import\ \ instructlab.training.data_process as dp\n from instructlab.training\ \ import (\n DataProcessArgs,\n TrainingArgs,\n )\n\n \ \ # define training-specific arguments\n skill_training_args = TrainingArgs(\n\ @@ -689,12 +2921,67 @@ deploymentSpec: exec-deletepvc: container: image: argostub/deletepvc + exec-deletepvc-10: + container: + image: argostub/deletepvc + exec-deletepvc-11: + container: + image: argostub/deletepvc + exec-deletepvc-12: + container: + image: argostub/deletepvc exec-deletepvc-2: container: image: argostub/deletepvc exec-deletepvc-3: container: image: argostub/deletepvc + exec-deletepvc-4: + container: + image: argostub/deletepvc + exec-deletepvc-5: + container: + image: argostub/deletepvc + exec-deletepvc-6: + container: + image: argostub/deletepvc + exec-deletepvc-7: + container: + image: argostub/deletepvc + exec-deletepvc-8: + container: + image: argostub/deletepvc + exec-deletepvc-9: + container: + image: argostub/deletepvc + exec-get-training-data: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - get_training_data + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.9.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef get_training_data():\n import json\n pass\n\n" + image: registry.access.redhat.com/ubi9/toolbox exec-git-clone-op: container: args: @@ -706,10 +2993,53 @@ deploymentSpec: pull/{{$.inputs.parameters[''repo_pr'']}}/head:{{$.inputs.parameters[''repo_pr'']}} && git checkout {{$.inputs.parameters[''repo_pr'']}}; fi ' command: - - /bin/sh + - /bin/sh + - -c + image: registry.access.redhat.com/ubi9/toolbox + exec-git-clone-op-2: + container: + args: + - 'git clone {{$.inputs.parameters[''repo_url'']}} {{$.inputs.parameters[''taxonomy_path'']}} + && cd {{$.inputs.parameters[''taxonomy_path'']}} && if [ -n "{{$.inputs.parameters[''repo_branch'']}}" + ]; then git fetch origin {{$.inputs.parameters[''repo_branch'']}} && git + checkout {{$.inputs.parameters[''repo_branch'']}}; elif [ -n "{{$.inputs.parameters[''repo_pr'']}}" + ] && [ {{$.inputs.parameters[''repo_pr'']}} -gt 0 ]; then git fetch origin + pull/{{$.inputs.parameters[''repo_pr'']}}/head:{{$.inputs.parameters[''repo_pr'']}} + && git checkout {{$.inputs.parameters[''repo_pr'']}}; fi ' + command: + - /bin/sh + - -c + image: registry.access.redhat.com/ubi9/toolbox + exec-huggingface-importer-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - huggingface_importer_op + command: + - sh - -c - image: registry.access.redhat.com/ubi9/toolbox - exec-huggingface-importer-op: + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'huggingface_hub'\ + \ && \"$0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef huggingface_importer_op(repo_name: str, model_path: str = \"\ + /model\"):\n from huggingface_hub import snapshot_download\n\n snapshot_download(repo_id=repo_name,\ + \ cache_dir=\"/tmp\", local_dir=model_path)\n\n" + image: quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111 + exec-huggingface-importer-op-2: container: args: - --executor_input @@ -738,55 +3068,309 @@ deploymentSpec: /model\"):\n from huggingface_hub import snapshot_download\n\n snapshot_download(repo_id=repo_name,\ \ cache_dir=\"/tmp\", local_dir=model_path)\n\n" image: quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111 - exec-knowledge-processed-data-to-artifact-op: - container: - args: - - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['knowledge_processed_data'].path}} - command: - - /bin/sh - - -c - image: registry.access.redhat.com/ubi9/toolbox - exec-kubectl-apply-op: - container: - args: - - echo "{{$.inputs.parameters['manifest']}}" | kubectl apply -f - - command: - - /bin/sh - - -c - image: registry.redhat.io/openshift4/ose-cli - exec-kubectl-apply-op-2: - container: - args: - - echo "{{$.inputs.parameters['manifest']}}" | kubectl apply -f - - command: - - /bin/sh - - -c - image: registry.redhat.io/openshift4/ose-cli - exec-kubectl-wait-for-op: - container: - args: - - kubectl wait --for={{$.inputs.parameters['condition']}} {{$.inputs.parameters['kind']}}/{{$.inputs.parameters['name']}} - --timeout=24h - command: - - /bin/sh - - -c - image: registry.redhat.io/openshift4/ose-cli - exec-kubectl-wait-for-op-2: - container: - args: - - kubectl wait --for={{$.inputs.parameters['condition']}} {{$.inputs.parameters['kind']}}/{{$.inputs.parameters['name']}} - --timeout=24h - command: - - /bin/sh - - -c - image: registry.redhat.io/openshift4/ose-cli - exec-list-models-in-directory-op: + exec-knowledge-processed-data-to-artifact-op: + container: + args: + - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['knowledge_processed_data'].path}} + command: + - /bin/sh + - -c + image: registry.access.redhat.com/ubi9/toolbox + exec-knowledge-processed-data-to-artifact-op-2: + container: + args: + - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['knowledge_processed_data'].path}} + command: + - /bin/sh + - -c + image: registry.access.redhat.com/ubi9/toolbox + exec-kubectl-apply-op: + container: + args: + - echo "{{$.inputs.parameters['manifest']}}" | kubectl apply -f - + command: + - /bin/sh + - -c + image: registry.redhat.io/openshift4/ose-cli + exec-kubectl-apply-op-2: + container: + args: + - echo "{{$.inputs.parameters['manifest']}}" | kubectl apply -f - + command: + - /bin/sh + - -c + image: registry.redhat.io/openshift4/ose-cli + exec-kubectl-apply-op-3: + container: + args: + - echo "{{$.inputs.parameters['manifest']}}" | kubectl apply -f - + command: + - /bin/sh + - -c + image: registry.redhat.io/openshift4/ose-cli + exec-kubectl-apply-op-4: + container: + args: + - echo "{{$.inputs.parameters['manifest']}}" | kubectl apply -f - + command: + - /bin/sh + - -c + image: registry.redhat.io/openshift4/ose-cli + exec-kubectl-wait-for-op: + container: + args: + - kubectl wait --for={{$.inputs.parameters['condition']}} {{$.inputs.parameters['kind']}}/{{$.inputs.parameters['name']}} + --timeout=24h + command: + - /bin/sh + - -c + image: registry.redhat.io/openshift4/ose-cli + exec-kubectl-wait-for-op-2: + container: + args: + - kubectl wait --for={{$.inputs.parameters['condition']}} {{$.inputs.parameters['kind']}}/{{$.inputs.parameters['name']}} + --timeout=24h + command: + - /bin/sh + - -c + image: registry.redhat.io/openshift4/ose-cli + exec-kubectl-wait-for-op-3: + container: + args: + - kubectl wait --for={{$.inputs.parameters['condition']}} {{$.inputs.parameters['kind']}}/{{$.inputs.parameters['name']}} + --timeout=24h + command: + - /bin/sh + - -c + image: registry.redhat.io/openshift4/ose-cli + exec-kubectl-wait-for-op-4: + container: + args: + - kubectl wait --for={{$.inputs.parameters['condition']}} {{$.inputs.parameters['kind']}}/{{$.inputs.parameters['name']}} + --timeout=24h + command: + - /bin/sh + - -c + image: registry.redhat.io/openshift4/ose-cli + exec-list-models-in-directory-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - list_models_in_directory_op + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef list_models_in_directory_op(models_folder: str) -> List[str]:\n\ + \ import os\n\n models = os.listdir(models_folder)\n return models\n\ + \n" + image: quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111 + exec-list-models-in-directory-op-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - list_models_in_directory_op + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef list_models_in_directory_op(models_folder: str) -> List[str]:\n\ + \ import os\n\n models = os.listdir(models_folder)\n return models\n\ + \n" + image: quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111 + exec-pvc-to-model-op: + container: + args: + - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['model'].path}} + command: + - /bin/sh + - -c + image: registry.access.redhat.com/ubi9/toolbox + exec-pvc-to-mt-bench-op: + container: + args: + - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['mt_bench_output'].path}} + command: + - /bin/sh + - -c + image: registry.access.redhat.com/ubi9/toolbox + exec-pytorchjob-manifest-op: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - pytorchjob_manifest_op + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef pytorchjob_manifest_op(\n model_pvc_name: str,\n input_pvc_name:\ + \ str,\n output_pvc_name: str,\n name_suffix: str,\n # path_to_model:\ + \ str,\n phase_num: int,\n nproc_per_node: int = 3,\n nnodes: int\ + \ = 2,\n num_epochs: int = 2,\n effective_batch_size: int = 3840,\n\ + \ learning_rate: float = 1e-4,\n num_warmup_steps: int = 800,\n \ + \ save_samples: int = 0,\n max_batch_len: int = 20000,\n seed: int\ + \ = 42,\n) -> NamedTuple(\"outputs\", manifest=str, name=str):\n import\ + \ inspect\n import os\n\n def list_phase1_final_model():\n \ + \ model_dir = \"/output/phase_1/model/hf_format\"\n models = os.listdir(model_dir)\n\ + \ newest_idx = max(\n (os.path.getmtime(f\"{model_dir}/{model}\"\ + ), i)\n for i, model in enumerate(models)\n )[-1]\n \ + \ newest_model = models[newest_idx]\n return f\"{model_dir}/{newest_model}\"\ + \n\n Outputs = NamedTuple(\"outputs\", manifest=str, name=str)\n name\ + \ = f\"train-phase-{phase_num}-{name_suffix.rstrip('-sdg')}\"\n\n if\ + \ phase_num == 1:\n path_to_model = \"/input_model\"\n path_to_data\ + \ = \"/input_data/knowledge/data.jsonl\"\n elif phase_num == 2:\n \ + \ path_to_model = list_phase1_final_model()\n path_to_data =\ + \ \"/input_data/skills/data.jsonl\"\n else:\n raise RuntimeError(f\"\ + Unsupported value of {phase_num=}\")\n\n image = \"quay.io/redhat-et/ilab:1.2\"\ + \n\n manifest = inspect.cleandoc(\n f\"\"\"\n apiVersion:\ + \ kubeflow.org/v1\n kind: PyTorchJob\n metadata:\n \ + \ name: {name}\n spec:\n nprocPerNode: \\\\\"{nproc_per_node}\\\ + \\\"\n pytorchReplicaSpecs:\n Master:\n \ + \ replicas: 1\n restartPolicy: OnFailure\n template:\n\ + \ metadata:\n annotations:\n \ + \ sidecar.istio.io/inject: 'false'\n spec:\n \ + \ containers:\n - args:\n \ + \ - |\n echo \"Running phase {phase_num}\"\ + \n echo \"Using {path_to_model} model for training\"\ + \n echo \"Using {path_to_data} data for training\"\ + \n mkdir -p /output/phase_{phase_num}/model;\n\ + \ mkdir -p /output/data;\n \ + \ torchrun --nnodes {nnodes} \\\n --nproc_per_node\ + \ {nproc_per_node} \\\n --node_rank \\$(RANK)\ + \ \\\n --rdzv_endpoint \\$(MASTER_ADDR):\\\ + $(MASTER_PORT) \\\n -m instructlab.training.main_ds\ + \ \\\n --model_name_or_path={path_to_model}\ + \ \\\n --data_path={path_to_data} \\\n \ + \ --output_dir=/output/phase_{phase_num}/model\ + \ \\\n --num_epochs={num_epochs} \\\n \ + \ --effective_batch_size={effective_batch_size}\ + \ \\\n --learning_rate={learning_rate} \\\n\ + \ --num_warmup_steps={num_warmup_steps} \\\n\ + \ --save_samples={save_samples} \\\n \ + \ --log_level=INFO \\\n \ + \ --max_batch_len={max_batch_len} \\\n \ + \ --seed={seed} \\\n --cpu_offload_optimizer\ + \ \\\n --cpu_offload_params \\\n \ + \ --distributed_training_framework fsdp \\\n \ + \ --is_granite \\\n --checkpoint_at_epoch\n\ + \ command:\n - /bin/bash\n \ + \ - '-c'\n - '--'\n \ + \ image: {image}\n name: pytorch\n \ + \ volumeMounts:\n - mountPath:\ + \ /input_data\n name: input-data\n \ + \ readOnly: true\n - mountPath: /input_model\n\ + \ name: model\n readOnly:\ + \ true\n - mountPath: /output\n \ + \ name: output\n env:\n \ + \ - name: NNODES\n value: \\\\\"{nnodes}\\\\\ + \"\n - name: NPROC_PER_NODE\n \ + \ value: \\\\\"{nproc_per_node}\\\\\"\n - name:\ + \ XDG_CACHE_HOME\n value: /tmp\n \ + \ - name: TRITON_CACHE_DIR\n value: /tmp\n\ + \ - name: HF_HOME\n value:\ + \ /tmp\n - name: TRANSFORMERS_CACHE\n \ + \ value: /tmp\n resources:\n \ + \ requests:\n cpu: 8\n \ + \ \"nvidia.com/gpu\": {nproc_per_node}\n \ + \ limits:\n cpu: 8\n \ + \ \"nvidia.com/gpu\": {nproc_per_node}\n volumes:\n\ + \ - name: input-data\n persistentVolumeClaim:\n\ + \ claimName: {input_pvc_name}\n \ + \ - name: model\n persistentVolumeClaim:\n \ + \ claimName: {model_pvc_name}\n - name:\ + \ output\n persistentVolumeClaim:\n \ + \ claimName: {output_pvc_name}\n Worker:\n \ + \ replicas: {nnodes-1}\n restartPolicy: OnFailure\n \ + \ template:\n metadata:\n annotations:\n\ + \ sidecar.istio.io/inject: 'false'\n spec:\n\ + \ containers:\n - args:\n \ + \ - |\n echo \"Running phase {phase_num}\"\ + \n echo \"Using {path_to_model} model for training\"\ + \n echo \"Using {path_to_data} data for training\"\ + \n mkdir -p /tmp/model;\n \ + \ torchrun --nnodes {nnodes} \\\n --nproc_per_node\ + \ {nproc_per_node} \\\n --node_rank \\$(RANK)\ + \ \\\n --rdzv_endpoint \\$(MASTER_ADDR):\\$(MASTER_PORT)\ + \ \\\n -m instructlab.training.main_ds \\\n \ + \ --model_name_or_path={path_to_model} \\\n \ + \ --data_path={path_to_data} \\\n \ + \ --output_dir=/tmp/model \\\n \ + \ --num_epochs={num_epochs} \\\n --effective_batch_size={effective_batch_size}\ + \ \\\n --learning_rate={learning_rate} \\\n \ + \ --num_warmup_steps={num_warmup_steps} \\\n \ + \ --save_samples={save_samples} \\\n \ + \ --log_level=INFO \\\n --max_batch_len={max_batch_len}\ + \ \\\n --seed={seed} \\\n \ + \ --cpu_offload_optimizer \\\n --cpu_offload_params\ + \ \\\n --distributed_training_framework fsdp\ + \ \\\n --is_granite \\\n \ + \ --checkpoint_at_epoch\n command:\n \ + \ - /bin/bash\n - '-c'\n \ + \ - '--'\n image: {image}\n \ + \ name: pytorch\n volumeMounts:\n \ + \ - mountPath: /input_data\n \ + \ name: input-data\n readOnly: true\n \ + \ - mountPath: /input_model\n name:\ + \ model\n readOnly: true\n \ + \ - mountPath: /output\n name: output\n \ + \ readOnly: true\n env:\n \ + \ - name: NNODES\n value: \\\ + \\\"{nnodes}\\\\\"\n - name: NPROC_PER_NODE\n \ + \ value: \\\\\"{nproc_per_node}\\\\\"\n \ + \ - name: XDG_CACHE_HOME\n value:\ + \ /tmp\n - name: TRITON_CACHE_DIR\n \ + \ value: /tmp\n - name: HF_HOME\n \ + \ value: /tmp\n - name: TRANSFORMERS_CACHE\n\ + \ value: /tmp\n resources:\n\ + \ requests:\n cpu: 8\n \ + \ \"nvidia.com/gpu\": {nproc_per_node}\n \ + \ limits:\n cpu: 8\n \ + \ \"nvidia.com/gpu\": {nproc_per_node}\n \ + \ volumes:\n - name: input-data\n \ + \ persistentVolumeClaim:\n claimName: {input_pvc_name}\n\ + \ - name: model\n persistentVolumeClaim:\n\ + \ claimName: {model_pvc_name}\n \ + \ - name: output\n persistentVolumeClaim:\n \ + \ claimName: {output_pvc_name}\n \"\"\"\n )\n\ + \n return Outputs(manifest, name)\n\n" + image: quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111 + exec-pytorchjob-manifest-op-2: container: args: - --executor_input - '{{$}}' - --function_to_execute - - list_models_in_directory_op + - pytorchjob_manifest_op command: - sh - -ec @@ -799,27 +3383,138 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ - \ *\n\ndef list_models_in_directory_op(models_folder: str) -> List[str]:\n\ - \ import os\n\n models = os.listdir(models_folder)\n return models\n\ - \n" - image: quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111 - exec-pvc-to-model-op: - container: - args: - - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['model'].path}} - command: - - /bin/sh - - -c - image: registry.access.redhat.com/ubi9/toolbox - exec-pvc-to-mt-bench-op: - container: - args: - - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['mt_bench_output'].path}} - command: - - /bin/sh - - -c - image: registry.access.redhat.com/ubi9/toolbox - exec-pytorchjob-manifest-op: + \ *\n\ndef pytorchjob_manifest_op(\n model_pvc_name: str,\n input_pvc_name:\ + \ str,\n output_pvc_name: str,\n name_suffix: str,\n # path_to_model:\ + \ str,\n phase_num: int,\n nproc_per_node: int = 3,\n nnodes: int\ + \ = 2,\n num_epochs: int = 2,\n effective_batch_size: int = 3840,\n\ + \ learning_rate: float = 1e-4,\n num_warmup_steps: int = 800,\n \ + \ save_samples: int = 0,\n max_batch_len: int = 20000,\n seed: int\ + \ = 42,\n) -> NamedTuple(\"outputs\", manifest=str, name=str):\n import\ + \ inspect\n import os\n\n def list_phase1_final_model():\n \ + \ model_dir = \"/output/phase_1/model/hf_format\"\n models = os.listdir(model_dir)\n\ + \ newest_idx = max(\n (os.path.getmtime(f\"{model_dir}/{model}\"\ + ), i)\n for i, model in enumerate(models)\n )[-1]\n \ + \ newest_model = models[newest_idx]\n return f\"{model_dir}/{newest_model}\"\ + \n\n Outputs = NamedTuple(\"outputs\", manifest=str, name=str)\n name\ + \ = f\"train-phase-{phase_num}-{name_suffix.rstrip('-sdg')}\"\n\n if\ + \ phase_num == 1:\n path_to_model = \"/input_model\"\n path_to_data\ + \ = \"/input_data/knowledge/data.jsonl\"\n elif phase_num == 2:\n \ + \ path_to_model = list_phase1_final_model()\n path_to_data =\ + \ \"/input_data/skills/data.jsonl\"\n else:\n raise RuntimeError(f\"\ + Unsupported value of {phase_num=}\")\n\n image = \"quay.io/redhat-et/ilab:1.2\"\ + \n\n manifest = inspect.cleandoc(\n f\"\"\"\n apiVersion:\ + \ kubeflow.org/v1\n kind: PyTorchJob\n metadata:\n \ + \ name: {name}\n spec:\n nprocPerNode: \\\\\"{nproc_per_node}\\\ + \\\"\n pytorchReplicaSpecs:\n Master:\n \ + \ replicas: 1\n restartPolicy: OnFailure\n template:\n\ + \ metadata:\n annotations:\n \ + \ sidecar.istio.io/inject: 'false'\n spec:\n \ + \ containers:\n - args:\n \ + \ - |\n echo \"Running phase {phase_num}\"\ + \n echo \"Using {path_to_model} model for training\"\ + \n echo \"Using {path_to_data} data for training\"\ + \n mkdir -p /output/phase_{phase_num}/model;\n\ + \ mkdir -p /output/data;\n \ + \ torchrun --nnodes {nnodes} \\\n --nproc_per_node\ + \ {nproc_per_node} \\\n --node_rank \\$(RANK)\ + \ \\\n --rdzv_endpoint \\$(MASTER_ADDR):\\\ + $(MASTER_PORT) \\\n -m instructlab.training.main_ds\ + \ \\\n --model_name_or_path={path_to_model}\ + \ \\\n --data_path={path_to_data} \\\n \ + \ --output_dir=/output/phase_{phase_num}/model\ + \ \\\n --num_epochs={num_epochs} \\\n \ + \ --effective_batch_size={effective_batch_size}\ + \ \\\n --learning_rate={learning_rate} \\\n\ + \ --num_warmup_steps={num_warmup_steps} \\\n\ + \ --save_samples={save_samples} \\\n \ + \ --log_level=INFO \\\n \ + \ --max_batch_len={max_batch_len} \\\n \ + \ --seed={seed} \\\n --cpu_offload_optimizer\ + \ \\\n --cpu_offload_params \\\n \ + \ --distributed_training_framework fsdp \\\n \ + \ --is_granite \\\n --checkpoint_at_epoch\n\ + \ command:\n - /bin/bash\n \ + \ - '-c'\n - '--'\n \ + \ image: {image}\n name: pytorch\n \ + \ volumeMounts:\n - mountPath:\ + \ /input_data\n name: input-data\n \ + \ readOnly: true\n - mountPath: /input_model\n\ + \ name: model\n readOnly:\ + \ true\n - mountPath: /output\n \ + \ name: output\n env:\n \ + \ - name: NNODES\n value: \\\\\"{nnodes}\\\\\ + \"\n - name: NPROC_PER_NODE\n \ + \ value: \\\\\"{nproc_per_node}\\\\\"\n - name:\ + \ XDG_CACHE_HOME\n value: /tmp\n \ + \ - name: TRITON_CACHE_DIR\n value: /tmp\n\ + \ - name: HF_HOME\n value:\ + \ /tmp\n - name: TRANSFORMERS_CACHE\n \ + \ value: /tmp\n resources:\n \ + \ requests:\n cpu: 8\n \ + \ \"nvidia.com/gpu\": {nproc_per_node}\n \ + \ limits:\n cpu: 8\n \ + \ \"nvidia.com/gpu\": {nproc_per_node}\n volumes:\n\ + \ - name: input-data\n persistentVolumeClaim:\n\ + \ claimName: {input_pvc_name}\n \ + \ - name: model\n persistentVolumeClaim:\n \ + \ claimName: {model_pvc_name}\n - name:\ + \ output\n persistentVolumeClaim:\n \ + \ claimName: {output_pvc_name}\n Worker:\n \ + \ replicas: {nnodes-1}\n restartPolicy: OnFailure\n \ + \ template:\n metadata:\n annotations:\n\ + \ sidecar.istio.io/inject: 'false'\n spec:\n\ + \ containers:\n - args:\n \ + \ - |\n echo \"Running phase {phase_num}\"\ + \n echo \"Using {path_to_model} model for training\"\ + \n echo \"Using {path_to_data} data for training\"\ + \n mkdir -p /tmp/model;\n \ + \ torchrun --nnodes {nnodes} \\\n --nproc_per_node\ + \ {nproc_per_node} \\\n --node_rank \\$(RANK)\ + \ \\\n --rdzv_endpoint \\$(MASTER_ADDR):\\$(MASTER_PORT)\ + \ \\\n -m instructlab.training.main_ds \\\n \ + \ --model_name_or_path={path_to_model} \\\n \ + \ --data_path={path_to_data} \\\n \ + \ --output_dir=/tmp/model \\\n \ + \ --num_epochs={num_epochs} \\\n --effective_batch_size={effective_batch_size}\ + \ \\\n --learning_rate={learning_rate} \\\n \ + \ --num_warmup_steps={num_warmup_steps} \\\n \ + \ --save_samples={save_samples} \\\n \ + \ --log_level=INFO \\\n --max_batch_len={max_batch_len}\ + \ \\\n --seed={seed} \\\n \ + \ --cpu_offload_optimizer \\\n --cpu_offload_params\ + \ \\\n --distributed_training_framework fsdp\ + \ \\\n --is_granite \\\n \ + \ --checkpoint_at_epoch\n command:\n \ + \ - /bin/bash\n - '-c'\n \ + \ - '--'\n image: {image}\n \ + \ name: pytorch\n volumeMounts:\n \ + \ - mountPath: /input_data\n \ + \ name: input-data\n readOnly: true\n \ + \ - mountPath: /input_model\n name:\ + \ model\n readOnly: true\n \ + \ - mountPath: /output\n name: output\n \ + \ readOnly: true\n env:\n \ + \ - name: NNODES\n value: \\\ + \\\"{nnodes}\\\\\"\n - name: NPROC_PER_NODE\n \ + \ value: \\\\\"{nproc_per_node}\\\\\"\n \ + \ - name: XDG_CACHE_HOME\n value:\ + \ /tmp\n - name: TRITON_CACHE_DIR\n \ + \ value: /tmp\n - name: HF_HOME\n \ + \ value: /tmp\n - name: TRANSFORMERS_CACHE\n\ + \ value: /tmp\n resources:\n\ + \ requests:\n cpu: 8\n \ + \ \"nvidia.com/gpu\": {nproc_per_node}\n \ + \ limits:\n cpu: 8\n \ + \ \"nvidia.com/gpu\": {nproc_per_node}\n \ + \ volumes:\n - name: input-data\n \ + \ persistentVolumeClaim:\n claimName: {input_pvc_name}\n\ + \ - name: model\n persistentVolumeClaim:\n\ + \ claimName: {model_pvc_name}\n \ + \ - name: output\n persistentVolumeClaim:\n \ + \ claimName: {output_pvc_name}\n \"\"\"\n )\n\ + \n return Outputs(manifest, name)\n\n" + image: quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111 + exec-pytorchjob-manifest-op-3: container: args: - --executor_input @@ -969,7 +3664,7 @@ deploymentSpec: \ claimName: {output_pvc_name}\n \"\"\"\n )\n\ \n return Outputs(manifest, name)\n\n" image: quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111 - exec-pytorchjob-manifest-op-2: + exec-pytorchjob-manifest-op-4: container: args: - --executor_input @@ -1377,8 +4072,6 @@ deploymentSpec: resources: accelerator: count: '1' - resourceCount: '1' - resourceType: nvidia.com/gpu type: nvidia.com/gpu exec-run-mt-bench-op: container: @@ -1516,8 +4209,6 @@ deploymentSpec: resources: accelerator: count: '1' - resourceCount: '1' - resourceType: nvidia.com/gpu type: nvidia.com/gpu exec-sdg-op: container: @@ -1532,6 +4223,60 @@ deploymentSpec: - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef sdg_op(\n num_instructions_to_generate: int,\n pipeline:\ + \ str,\n repo_branch: Optional[str],\n repo_pr: Optional[int],\n \ + \ taxonomy_path: str = \"/data/taxonomy\",\n sdg_path: str = \"/data/sdg\"\ + ,\n):\n from os import getenv, path\n\n import openai\n import\ + \ yaml\n from instructlab.sdg import generate_data\n from instructlab.sdg.utils.taxonomy\ + \ import read_taxonomy\n\n SAMPLING_SIZE = 70\n\n def set_precomputed_skills_data_ratio(sampling_size):\n\ + \ skills_recipe = \"/usr/share/instructlab/sdg/default_data_recipes/skills.yaml\"\ + \n if path.exists(skills_recipe):\n with open(skills_recipe,\ + \ \"r\") as file:\n skills_yaml = yaml.load(file, Loader=yaml.Loader)\n\ + \n skills_yaml[\"datasets\"][0][\"sampling_size\"] = sampling_size\n\ + \n with open(skills_recipe, \"w\", encoding=\"utf-8\") as file:\n\ + \ yaml.dump(skills_yaml, file)\n\n api_key = getenv(\"\ + api_key\")\n model = getenv(\"model\")\n endpoint = getenv(\"endpoint\"\ + )\n\n if sdg_ca_cert := getenv(\"SDG_CA_CERT_PATH\"):\n import\ + \ httpx\n\n custom_http_client = httpx.Client(verify=sdg_ca_cert)\n\ + \ client = openai.OpenAI(\n base_url=endpoint, api_key=api_key,\ + \ http_client=custom_http_client\n )\n else:\n client =\ + \ openai.OpenAI(base_url=endpoint, api_key=api_key)\n\n taxonomy_base\ + \ = \"main\" if repo_branch or (repo_pr and int(repo_pr) > 0) else \"empty\"\ + \n\n print(\"Generating synthetic dataset for:\")\n print()\n print(read_taxonomy(taxonomy_path,\ + \ taxonomy_base))\n\n # Temporary measure to limit the amount of precomputed\ + \ skills data used to construct the SDG dataset.\n # Need during development\ + \ to decrease training loop times and the cost of model quality.\n set_precomputed_skills_data_ratio(sampling_size=SAMPLING_SIZE)\n\ + \n # generate_data has a magic word for its taxonomy_base argument -\ + \ 'empty'\n # it allows generating from the whole repo, see:\n # https://github.com/instructlab/sdg/blob/c6a9e74a1618b1077cd38e713b8aaed8b7c0c8ce/src/instructlab/sdg/utils/taxonomy.py#L230\n\ + \ generate_data(\n client=client,\n num_instructions_to_generate=num_instructions_to_generate,\n\ + \ output_dir=sdg_path,\n taxonomy=taxonomy_path,\n \ + \ taxonomy_base=taxonomy_base,\n model_name=model,\n pipeline=pipeline,\n\ + \ chunk_word_count=1000,\n server_ctx_size=4096,\n )\n\n" + env: + - name: HOME + value: /tmp + - name: HF_HOME + value: /tmp + image: quay.io/redhat-et/ilab:1.2 + exec-sdg-op-2: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - sdg_op + command: + - sh + - -ec + - 'program_path=$(mktemp -d) + + printf "%s" "$0" > "$program_path/ephemeral_component.py" _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" @@ -1581,6 +4326,14 @@ deploymentSpec: - /bin/sh - -c image: registry.access.redhat.com/ubi9/toolbox + exec-sdg-to-artifact-op-2: + container: + args: + - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['sdg'].path}} + command: + - /bin/sh + - -c + image: registry.access.redhat.com/ubi9/toolbox exec-skills-processed-data-to-artifact-op: container: args: @@ -1589,6 +4342,14 @@ deploymentSpec: - /bin/sh - -c image: registry.access.redhat.com/ubi9/toolbox + exec-skills-processed-data-to-artifact-op-2: + container: + args: + - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['skills_processed_data'].path}} + command: + - /bin/sh + - -c + image: registry.access.redhat.com/ubi9/toolbox exec-taxonomy-to-artifact-op: container: args: @@ -1597,6 +4358,14 @@ deploymentSpec: - /bin/sh - -c image: registry.access.redhat.com/ubi9/toolbox + exec-taxonomy-to-artifact-op-2: + container: + args: + - cp -r {{$.inputs.parameters['pvc_path']}} {{$.outputs.artifacts['taxonomy'].path}} + command: + - /bin/sh + - -c + image: registry.access.redhat.com/ubi9/toolbox pipelineInfo: description: InstructLab pipeline displayName: InstructLab @@ -1604,489 +4373,150 @@ pipelineInfo: root: dag: tasks: - createpvc: - cachingOptions: - enableCache: true - componentRef: - name: comp-createpvc - inputs: - parameters: - access_modes: - runtimeValue: - constant: - - ReadWriteMany - pvc_name_suffix: - runtimeValue: - constant: -sdg - size: - runtimeValue: - constant: 10Gi - storage_class_name: - componentInputParameter: k8s_storage_class_name - taskInfo: - name: createpvc - createpvc-2: - cachingOptions: - enableCache: true + condition-1: componentRef: - name: comp-createpvc-2 + name: comp-condition-1 inputs: parameters: - access_modes: - runtimeValue: - constant: - - ReadWriteMany - pvc_name_suffix: - runtimeValue: - constant: -model-cache - size: - runtimeValue: - constant: 100Gi - storage_class_name: + pipelinechannel--k8s_storage_class_name: componentInputParameter: k8s_storage_class_name + pipelinechannel--train_only: + componentInputParameter: train_only taskInfo: - name: createpvc-2 - createpvc-3: - cachingOptions: - enableCache: true + name: Skip Condition + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--train_only'] == true + condition-2: componentRef: - name: comp-createpvc-3 + name: comp-condition-2 inputs: parameters: - access_modes: - runtimeValue: - constant: - - ReadWriteMany - pvc_name_suffix: - runtimeValue: - constant: -output - size: - runtimeValue: - constant: 100Gi - storage_class_name: + pipelinechannel--k8s_storage_class_name: componentInputParameter: k8s_storage_class_name - taskInfo: - name: createpvc-3 - data-processing-op: - cachingOptions: {} - componentRef: - name: comp-data-processing-op - dependentTasks: - - createpvc - - createpvc-2 - - huggingface-importer-op - - sdg-op - inputs: - parameters: - max_batch_len: - componentInputParameter: sdg_max_batch_len - taskInfo: - name: data-processing-op - deletepvc: - cachingOptions: - enableCache: true - componentRef: - name: comp-deletepvc - dependentTasks: - - createpvc-3 - - pvc-to-model-op - - pvc-to-mt-bench-op - - run-final-eval-op - inputs: - parameters: - pvc_name: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc-3 - taskInfo: - name: deletepvc - deletepvc-2: - cachingOptions: - enableCache: true - componentRef: - name: comp-deletepvc-2 - dependentTasks: - - createpvc - - run-final-eval-op - inputs: - parameters: - pvc_name: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc - taskInfo: - name: deletepvc-2 - deletepvc-3: - cachingOptions: - enableCache: true - componentRef: - name: comp-deletepvc-3 - dependentTasks: - - createpvc-2 - - run-final-eval-op - inputs: - parameters: - pvc_name: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc-2 - taskInfo: - name: deletepvc-3 - git-clone-op: - cachingOptions: {} - componentRef: - name: comp-git-clone-op - dependentTasks: - - createpvc - inputs: - parameters: - repo_branch: + pipelinechannel--sdg_pipeline: + componentInputParameter: sdg_pipeline + pipelinechannel--sdg_repo_branch: componentInputParameter: sdg_repo_branch - repo_pr: + pipelinechannel--sdg_repo_pr: componentInputParameter: sdg_repo_pr - repo_url: + pipelinechannel--sdg_repo_url: componentInputParameter: sdg_repo_url + pipelinechannel--sdg_scale_factor: + componentInputParameter: sdg_scale_factor + pipelinechannel--train_only: + componentInputParameter: train_only taskInfo: - name: git-clone-op - huggingface-importer-op: - cachingOptions: {} + name: SDG Only + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--train_only'] == false + condition-3: componentRef: - name: comp-huggingface-importer-op - dependentTasks: - - createpvc-2 + name: comp-condition-3 inputs: parameters: - repo_name: + pipelinechannel--k8s_storage_class_name: + componentInputParameter: k8s_storage_class_name + pipelinechannel--sdg_base_model: componentInputParameter: sdg_base_model - taskInfo: - name: huggingface-importer-op - knowledge-processed-data-to-artifact-op: - cachingOptions: {} - componentRef: - name: comp-knowledge-processed-data-to-artifact-op - dependentTasks: - - createpvc - - data-processing-op - taskInfo: - name: knowledge-processed-data-to-artifact-op - kubectl-apply-op: - cachingOptions: {} - componentRef: - name: comp-kubectl-apply-op - dependentTasks: - - data-processing-op - - huggingface-importer-op - - pytorchjob-manifest-op - inputs: - parameters: - manifest: - taskOutputParameter: - outputParameterKey: manifest - producerTask: pytorchjob-manifest-op - taskInfo: - name: kubectl-apply-op - kubectl-apply-op-2: - cachingOptions: {} - componentRef: - name: comp-kubectl-apply-op-2 - dependentTasks: - - pytorchjob-manifest-op-2 - inputs: - parameters: - manifest: - taskOutputParameter: - outputParameterKey: manifest - producerTask: pytorchjob-manifest-op-2 - taskInfo: - name: kubectl-apply-op-2 - kubectl-wait-for-op: - cachingOptions: {} - componentRef: - name: comp-kubectl-wait-for-op - dependentTasks: - - kubectl-apply-op - - pytorchjob-manifest-op - inputs: - parameters: - condition: - runtimeValue: - constant: condition=Succeeded - kind: - runtimeValue: - constant: pytorchjobs - name: - taskOutputParameter: - outputParameterKey: name - producerTask: pytorchjob-manifest-op - taskInfo: - name: kubectl-wait-for-op - kubectl-wait-for-op-2: - cachingOptions: {} - componentRef: - name: comp-kubectl-wait-for-op-2 - dependentTasks: - - kubectl-apply-op-2 - - pytorchjob-manifest-op-2 - inputs: - parameters: - condition: - runtimeValue: - constant: condition=Succeeded - kind: - runtimeValue: - constant: pytorchjobs - name: - taskOutputParameter: - outputParameterKey: name - producerTask: pytorchjob-manifest-op-2 - taskInfo: - name: kubectl-wait-for-op-2 - list-models-in-directory-op: - cachingOptions: {} - componentRef: - name: comp-list-models-in-directory-op - dependentTasks: - - createpvc-3 - - kubectl-wait-for-op-2 - inputs: - parameters: - models_folder: - runtimeValue: - constant: /output/phase_2/model/hf_format - taskInfo: - name: list-models-in-directory-op - pvc-to-model-op: - cachingOptions: {} - componentRef: - name: comp-pvc-to-model-op - dependentTasks: - - createpvc-3 - - run-mt-bench-op - inputs: - parameters: - pvc_path: - runtimeValue: - constant: /output/phase_2/model/hf_format/candidate_model - taskInfo: - name: pvc-to-model-op - pvc-to-mt-bench-op: - cachingOptions: - enableCache: true - componentRef: - name: comp-pvc-to-mt-bench-op - dependentTasks: - - createpvc-3 - - run-mt-bench-op - inputs: - parameters: - pvc_path: - runtimeValue: - constant: /output/mt_bench_data.json - taskInfo: - name: pvc-to-mt-bench-op - pytorchjob-manifest-op: - cachingOptions: {} - componentRef: - name: comp-pytorchjob-manifest-op - dependentTasks: - - createpvc - - createpvc-2 - - createpvc-3 - inputs: - parameters: - effective_batch_size: + pipelinechannel--sdg_max_batch_len: + componentInputParameter: sdg_max_batch_len + pipelinechannel--sdg_only: + componentInputParameter: sdg_only + pipelinechannel--train_effective_batch_size_phase_1: componentInputParameter: train_effective_batch_size_phase_1 - input_pvc_name: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc - learning_rate: - componentInputParameter: train_learning_rate_phase_1 - max_batch_len: - componentInputParameter: train_max_batch_len - model_pvc_name: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc-2 - name_suffix: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc - nnodes: - componentInputParameter: train_nnodes - nproc_per_node: - componentInputParameter: train_nproc_per_node - num_epochs: - componentInputParameter: train_num_epochs_phase_1 - num_warmup_steps: - componentInputParameter: train_num_warmup_steps_phase_1 - output_pvc_name: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc-3 - phase_num: - runtimeValue: - constant: 1.0 - save_samples: - componentInputParameter: train_save_samples - seed: - componentInputParameter: train_seed - taskInfo: - name: pytorchjob-manifest-op - pytorchjob-manifest-op-2: - cachingOptions: {} - componentRef: - name: comp-pytorchjob-manifest-op-2 - dependentTasks: - - createpvc - - createpvc-2 - - createpvc-3 - - kubectl-wait-for-op - inputs: - parameters: - effective_batch_size: + pipelinechannel--train_effective_batch_size_phase_2: componentInputParameter: train_effective_batch_size_phase_2 - input_pvc_name: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc - learning_rate: + pipelinechannel--train_learning_rate_phase_1: + componentInputParameter: train_learning_rate_phase_1 + pipelinechannel--train_learning_rate_phase_2: componentInputParameter: train_learning_rate_phase_2 - max_batch_len: + pipelinechannel--train_max_batch_len: componentInputParameter: train_max_batch_len - model_pvc_name: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc-2 - name_suffix: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc - nnodes: + pipelinechannel--train_nnodes: componentInputParameter: train_nnodes - nproc_per_node: + pipelinechannel--train_nproc_per_node: componentInputParameter: train_nproc_per_node - num_epochs: + pipelinechannel--train_num_epochs_phase_1: + componentInputParameter: train_num_epochs_phase_1 + pipelinechannel--train_num_epochs_phase_2: componentInputParameter: train_num_epochs_phase_2 - num_warmup_steps: + pipelinechannel--train_num_warmup_steps_phase_1: + componentInputParameter: train_num_warmup_steps_phase_1 + pipelinechannel--train_num_warmup_steps_phase_2: componentInputParameter: train_num_warmup_steps_phase_2 - output_pvc_name: - taskOutputParameter: - outputParameterKey: name - producerTask: createpvc-3 - phase_num: - runtimeValue: - constant: 2.0 - save_samples: + pipelinechannel--train_save_samples: componentInputParameter: train_save_samples - seed: + pipelinechannel--train_seed: componentInputParameter: train_seed taskInfo: - name: pytorchjob-manifest-op-2 - run-final-eval-op: - cachingOptions: - enableCache: true + name: Train Only + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--sdg_only'] == false + condition-4: componentRef: - name: comp-run-final-eval-op - dependentTasks: - - createpvc - - createpvc-2 - - createpvc-3 - - run-mt-bench-op + name: comp-condition-4 inputs: parameters: - base_branch: - componentInputParameter: sdg_repo_branch - base_model_dir: - runtimeValue: - constant: /model/ - batch_size: + pipelinechannel--final_eval_batch_size: componentInputParameter: final_eval_batch_size - candidate_branch: - componentInputParameter: sdg_repo_branch - candidate_model: - runtimeValue: - constant: /output/phase_2/model/hf_format/candidate_model - few_shots: + pipelinechannel--final_eval_few_shots: componentInputParameter: final_eval_few_shots - max_workers: + pipelinechannel--final_eval_max_workers: componentInputParameter: final_eval_max_workers - merge_system_user_message: + pipelinechannel--final_eval_merge_system_user_message: componentInputParameter: final_eval_merge_system_user_message - taskInfo: - name: run-final-eval-op - run-mt-bench-op: - cachingOptions: {} - componentRef: - name: comp-run-mt-bench-op - dependentTasks: - - createpvc-3 - - list-models-in-directory-op - inputs: - parameters: - max_workers: + pipelinechannel--k8s_storage_class_name: + componentInputParameter: k8s_storage_class_name + pipelinechannel--mt_bench_max_workers: componentInputParameter: mt_bench_max_workers - merge_system_user_message: + pipelinechannel--mt_bench_merge_system_user_message: componentInputParameter: mt_bench_merge_system_user_message - models_list: - taskOutputParameter: - outputParameterKey: Output - producerTask: list-models-in-directory-op - models_path_prefix: - runtimeValue: - constant: /output/phase_2/model/hf_format - taskInfo: - name: run-mt-bench-op - sdg-op: - cachingOptions: {} - componentRef: - name: comp-sdg-op - dependentTasks: - - createpvc - - git-clone-op - inputs: - parameters: - num_instructions_to_generate: - componentInputParameter: sdg_scale_factor - pipeline: + pipelinechannel--sdg_base_model: + componentInputParameter: sdg_base_model + pipelinechannel--sdg_max_batch_len: + componentInputParameter: sdg_max_batch_len + pipelinechannel--sdg_pipeline: componentInputParameter: sdg_pipeline - repo_branch: + pipelinechannel--sdg_repo_branch: componentInputParameter: sdg_repo_branch - repo_pr: + pipelinechannel--sdg_repo_pr: componentInputParameter: sdg_repo_pr + pipelinechannel--sdg_repo_url: + componentInputParameter: sdg_repo_url + pipelinechannel--sdg_scale_factor: + componentInputParameter: sdg_scale_factor + pipelinechannel--train_effective_batch_size_phase_1: + componentInputParameter: train_effective_batch_size_phase_1 + pipelinechannel--train_effective_batch_size_phase_2: + componentInputParameter: train_effective_batch_size_phase_2 + pipelinechannel--train_learning_rate_phase_1: + componentInputParameter: train_learning_rate_phase_1 + pipelinechannel--train_learning_rate_phase_2: + componentInputParameter: train_learning_rate_phase_2 + pipelinechannel--train_max_batch_len: + componentInputParameter: train_max_batch_len + pipelinechannel--train_nnodes: + componentInputParameter: train_nnodes + pipelinechannel--train_nproc_per_node: + componentInputParameter: train_nproc_per_node + pipelinechannel--train_num_epochs_phase_1: + componentInputParameter: train_num_epochs_phase_1 + pipelinechannel--train_num_epochs_phase_2: + componentInputParameter: train_num_epochs_phase_2 + pipelinechannel--train_num_warmup_steps_phase_1: + componentInputParameter: train_num_warmup_steps_phase_1 + pipelinechannel--train_num_warmup_steps_phase_2: + componentInputParameter: train_num_warmup_steps_phase_2 + pipelinechannel--train_only: + componentInputParameter: train_only + pipelinechannel--train_save_samples: + componentInputParameter: train_save_samples + pipelinechannel--train_seed: + componentInputParameter: train_seed taskInfo: - name: sdg-op - sdg-to-artifact-op: - cachingOptions: - enableCache: true - componentRef: - name: comp-sdg-to-artifact-op - dependentTasks: - - createpvc - - git-clone-op - - sdg-op - taskInfo: - name: sdg-to-artifact-op - skills-processed-data-to-artifact-op: - cachingOptions: {} - componentRef: - name: comp-skills-processed-data-to-artifact-op - dependentTasks: - - createpvc - - data-processing-op - taskInfo: - name: skills-processed-data-to-artifact-op - taxonomy-to-artifact-op: - cachingOptions: - enableCache: true - componentRef: - name: comp-taxonomy-to-artifact-op - dependentTasks: - - createpvc - - git-clone-op - - sdg-op - taskInfo: - name: taxonomy-to-artifact-op + name: All Stages + triggerPolicy: + condition: inputs.parameter_values['pipelinechannel--train_only'] == false inputDefinitions: parameters: final_eval_batch_size: @@ -2145,6 +4575,10 @@ root: be handled in a single step. isOptional: true parameterType: NUMBER_INTEGER + sdg_only: + defaultValue: false + isOptional: true + parameterType: BOOLEAN sdg_pipeline: defaultValue: simple description: 'SDG parameter. Data generation pipeline to use. Available: ''simple'', @@ -2241,6 +4675,10 @@ root: and linearly climb up to train_learning_rate. isOptional: true parameterType: NUMBER_INTEGER + train_only: + defaultValue: false + isOptional: true + parameterType: BOOLEAN train_save_samples: defaultValue: 0.0 description: Training parameter. Number of samples the model should see before @@ -2253,7 +4691,7 @@ root: isOptional: true parameterType: NUMBER_INTEGER schemaVersion: 2.1.0 -sdkVersion: kfp-2.10.1 +sdkVersion: kfp-2.9.0 --- platforms: kubernetes: @@ -2266,53 +4704,95 @@ platforms: - mountPath: /model taskOutputParameter: outputParameterKey: name - producerTask: createpvc-2 + producerTask: createpvc-9 + - mountPath: /data + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-7 + exec-data-processing-op-2: + imagePullSecret: + - secretName: redhat-et-ilab-botty-pull-secret + pvcMount: + - mountPath: /model + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-12 - mountPath: /data taskOutputParameter: outputParameterKey: name - producerTask: createpvc + producerTask: createpvc-10 exec-git-clone-op: pvcMount: - mountPath: /data taskOutputParameter: outputParameterKey: name - producerTask: createpvc + producerTask: createpvc-4 + exec-git-clone-op-2: + pvcMount: + - mountPath: /data + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 exec-huggingface-importer-op: pvcMount: - mountPath: /model taskOutputParameter: outputParameterKey: name - producerTask: createpvc-2 + producerTask: createpvc-9 + exec-huggingface-importer-op-2: + pvcMount: + - mountPath: /model + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-12 exec-knowledge-processed-data-to-artifact-op: pvcMount: - mountPath: /data taskOutputParameter: outputParameterKey: name - producerTask: createpvc + producerTask: createpvc-7 + exec-knowledge-processed-data-to-artifact-op-2: + pvcMount: + - mountPath: /data + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 exec-list-models-in-directory-op: pvcMount: - mountPath: /output taskOutputParameter: outputParameterKey: name - producerTask: createpvc-3 + producerTask: createpvc-8 + exec-list-models-in-directory-op-2: + pvcMount: + - mountPath: /output + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-11 exec-pvc-to-model-op: pvcMount: - mountPath: /output taskOutputParameter: outputParameterKey: name - producerTask: createpvc-3 + producerTask: createpvc-11 exec-pvc-to-mt-bench-op: pvcMount: - mountPath: /output taskOutputParameter: outputParameterKey: name - producerTask: createpvc-3 + producerTask: createpvc-11 exec-pytorchjob-manifest-op-2: pvcMount: - mountPath: /output taskOutputParameter: outputParameterKey: name - producerTask: createpvc-3 + producerTask: createpvc-8 + exec-pytorchjob-manifest-op-4: + pvcMount: + - mountPath: /output + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-11 exec-run-final-eval-op: configMapAsEnv: - configMapName: judge-server @@ -2327,15 +4807,15 @@ platforms: - mountPath: /output taskOutputParameter: outputParameterKey: name - producerTask: createpvc-3 + producerTask: createpvc-11 - mountPath: /input taskOutputParameter: outputParameterKey: name - producerTask: createpvc + producerTask: createpvc-10 - mountPath: /model taskOutputParameter: outputParameterKey: name - producerTask: createpvc-2 + producerTask: createpvc-12 secretAsEnv: - keyToEnv: - envVar: JUDGE_API_KEY @@ -2355,7 +4835,7 @@ platforms: - mountPath: /output taskOutputParameter: outputParameterKey: name - producerTask: createpvc-3 + producerTask: createpvc-11 secretAsEnv: - keyToEnv: - envVar: JUDGE_API_KEY @@ -2375,7 +4855,27 @@ platforms: - mountPath: /data taskOutputParameter: outputParameterKey: name - producerTask: createpvc + producerTask: createpvc-4 + secretAsEnv: + - keyToEnv: + - envVar: api_key + secretKey: api_key + secretName: teacher-server + exec-sdg-op-2: + configMapAsEnv: + - configMapName: teacher-server + keyToEnv: + - configMapKey: endpoint + envVar: endpoint + - configMapKey: model + envVar: model + imagePullSecret: + - secretName: redhat-et-ilab-botty-pull-secret + pvcMount: + - mountPath: /data + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 secretAsEnv: - keyToEnv: - envVar: api_key @@ -2386,16 +4886,34 @@ platforms: - mountPath: /data taskOutputParameter: outputParameterKey: name - producerTask: createpvc + producerTask: createpvc-4 + exec-sdg-to-artifact-op-2: + pvcMount: + - mountPath: /data + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 exec-skills-processed-data-to-artifact-op: pvcMount: - mountPath: /data taskOutputParameter: outputParameterKey: name - producerTask: createpvc + producerTask: createpvc-7 + exec-skills-processed-data-to-artifact-op-2: + pvcMount: + - mountPath: /data + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 exec-taxonomy-to-artifact-op: pvcMount: - mountPath: /data taskOutputParameter: outputParameterKey: name - producerTask: createpvc + producerTask: createpvc-4 + exec-taxonomy-to-artifact-op-2: + pvcMount: + - mountPath: /data + taskOutputParameter: + outputParameterKey: name + producerTask: createpvc-10 diff --git a/sdg/__init__.py b/sdg/__init__.py index 91a0022b..8ab3b66f 100644 --- a/sdg/__init__.py +++ b/sdg/__init__.py @@ -1,5 +1,6 @@ from . import faked from .components import ( + get_training_data, git_clone_op, sdg_op, sdg_to_artifact_op, @@ -11,5 +12,6 @@ "sdg_op", "taxonomy_to_artifact_op", "sdg_to_artifact_op", + "get_training_data", "faked", ] diff --git a/sdg/components.py b/sdg/components.py index 49a36459..0f8e67ab 100644 --- a/sdg/components.py +++ b/sdg/components.py @@ -27,6 +27,13 @@ def git_clone_op( ) +@dsl.component(base_image=TOOLBOX_IMAGE) +def get_training_data(): + import json + + pass + + @dsl.component(base_image=RHELAI_IMAGE, install_kfp_package=False) def sdg_op( num_instructions_to_generate: int,