Skip to content

Commit

Permalink
update settings of the job
Browse files Browse the repository at this point in the history
Signed-off-by: helenxie-bit <helenxiehz@gmail.com>
  • Loading branch information
helenxie-bit committed Aug 9, 2024
1 parent daa0054 commit 8d4af90
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions sdk/python/test_e2e/test_e2e_train_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@


def test_train_api(job_namespace):
num_workers = 1
num_workers = 4

# Use test case from fine-tuning API tutorial
# https://www.kubeflow.org/docs/components/training/user-guides/fine-tuning/
Expand Down Expand Up @@ -74,10 +74,10 @@ def test_train_api(job_namespace):
),
),
num_workers=num_workers, # nodes parameter for torchrun command.
num_procs_per_worker=1, # nproc-per-node parameter for torchrun command.
num_procs_per_worker=2, # nproc-per-node parameter for torchrun command.
resources_per_worker={
"gpu": 0,
"cpu": 2,
"gpu": 2,
"cpu": 5,
"memory": "10G",
},
)
Expand All @@ -87,7 +87,7 @@ def test_train_api(job_namespace):

try:
utils.verify_job_e2e(
TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=60 * 30
TRAINING_CLIENT, JOB_NAME, job_namespace, wait_timeout=60 * 60
)
logging.info(f"Training job {JOB_NAME} is succeded.")
except Exception as e:
Expand Down

0 comments on commit 8d4af90

Please sign in to comment.