Skip to content

Commit

Permalink
Format entire codebase
Browse files Browse the repository at this point in the history
Command used to format the code:

```
ruff format .
```

Signed-off-by: Sébastien Han <seb@redhat.com>
  • Loading branch information
leseb committed Oct 3, 2024
1 parent 8748546 commit f3f946b
Show file tree
Hide file tree
Showing 13 changed files with 426 additions and 340 deletions.
3 changes: 1 addition & 2 deletions eval/mmlu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from .components import run_mmlu_op, load_mmlu_results_op
#from . import faked
# from . import faked

__all__ = ["run_mmlu_op", "load_mmlu_results_op"]

33 changes: 23 additions & 10 deletions eval/mmlu/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

EVAL_IMAGE = "quay.io/sallyom/instructlab-ocp:eval"


@component(base_image=EVAL_IMAGE)
def run_mmlu_op(
mmlu_output: Output[Artifact],
Expand All @@ -16,21 +17,27 @@ def run_mmlu_op(
batch_size: int,
device: str,
models_list: List[str],
) -> NamedTuple('outputs', best_model=str, best_score=float):
) -> NamedTuple("outputs", best_model=str, best_score=float):
import json
import os
import torch
from instructlab.eval.mmlu import MMLUEvaluator, MMLU_TASKS

mmlu_tasks = mmlu_tasks_list.split(',') if mmlu_tasks_list else MMLU_TASKS
mmlu_tasks = mmlu_tasks_list.split(",") if mmlu_tasks_list else MMLU_TASKS

# Device setup and debug
gpu_available = torch.cuda.is_available()
gpu_name = torch.cuda.get_device_name(torch.cuda.current_device()) if gpu_available else "No GPU available"
gpu_name = (
torch.cuda.get_device_name(torch.cuda.current_device())
if gpu_available
else "No GPU available"
)

print(f"GPU Available: {gpu_available}, Using: {gpu_name}")

effective_device = device if device is not None else ("cuda" if gpu_available else "cpu")
effective_device = (
device if device is not None else ("cuda" if gpu_available else "cpu")
)
print(f"Running on device: {effective_device}")

scores = {}
Expand All @@ -53,33 +60,39 @@ def run_mmlu_op(

mmlu_score, individual_scores = evaluator.run()
average_score = round(mmlu_score, 2)
print(f"Model {model_name} is stored at: {model_path} with AVERAGE_SCORE: {average_score}")
print(
f"Model {model_name} is stored at: {model_path} with AVERAGE_SCORE: {average_score}"
)

mmlu_data = {
"report_title": "KNOWLEDGE EVALUATION REPORT",
"model": model_name,
"average_score": average_score,
"number_of_tasks": len(individual_scores),
"individual_scores": [{task: round(score['score'], 2)} for task, score in individual_scores.items()]
"individual_scores": [
{task: round(score["score"], 2)}
for task, score in individual_scores.items()
],
}

all_mmlu_data.append(mmlu_data)
scores[model_path] = average_score

with open(mmlu_output.path, 'w') as f:
with open(mmlu_output.path, "w") as f:
json.dump(all_mmlu_data, f, indent=4)
outputs = NamedTuple('outputs', best_model=str, best_score=float)
outputs = NamedTuple("outputs", best_model=str, best_score=float)
best_model = max(scores, key=scores.get)
best_score = scores[best_model]
return outputs(best_model=best_model, best_score=best_score)


@component(base_image=PYTHON_IMAGE)
def load_mmlu_results_op(mmlu_output: Input[Artifact]) -> list:
import json

mmlu_score_list = []
with open(mmlu_output.path, 'r') as f:
mmlu_score_list = json.load(f)
with open(mmlu_output.path, "r") as f:
mmlu_score_list = json.load(f)

print("MMLU Evaluation Data:")
for mmlu_score in mmlu_score_list:
Expand Down
3 changes: 1 addition & 2 deletions eval/mt_bench/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from .components import run_mt_bench_op, load_mt_bench_results_op
#from . import faked
# from . import faked

__all__ = ["run_mt_bench_op", "load_mt_bench_results_op"]

94 changes: 60 additions & 34 deletions eval/mt_bench/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

EVAL_IMAGE = "quay.io/sallyom/instructlab-ocp:eval"


@component(base_image=EVAL_IMAGE, packages_to_install=["vllm"])
def run_mt_bench_op(
models_path_prefix: str,
Expand All @@ -17,10 +18,10 @@ def run_mt_bench_op(
# https://github.com/instructlab/eval/blob/main/src/instructlab/eval/mt_bench.py#L36
max_workers: str = "auto",
device: str = None,
) -> NamedTuple('outputs', best_model=str, best_score=float):


def launch_vllm_server_background(model_path: str, gpu_count: int, retries: int = 60, delay: int = 5):
) -> NamedTuple("outputs", best_model=str, best_score=float):
def launch_vllm_server_background(
model_path: str, gpu_count: int, retries: int = 60, delay: int = 5
):
import subprocess
import sys
import time
Expand All @@ -29,15 +30,20 @@ def launch_vllm_server_background(model_path: str, gpu_count: int, retries: int
if gpu_count > 0:
command = [
sys.executable,
"-m", "vllm.entrypoints.openai.api_server",
"--model", model_path,
"--tensor-parallel-size", str(gpu_count),
"-m",
"vllm.entrypoints.openai.api_server",
"--model",
model_path,
"--tensor-parallel-size",
str(gpu_count),
]
else:
command = [
sys.executable,
"-m", "vllm.entrypoints.openai.api_server",
"--model", model_path,
"-m",
"vllm.entrypoints.openai.api_server",
"--model",
model_path,
]

subprocess.Popen(args=command)
Expand All @@ -54,10 +60,14 @@ def launch_vllm_server_background(model_path: str, gpu_count: int, retries: int
except requests.ConnectionError:
pass

print(f"Server not available yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})...")
print(
f"Server not available yet, retrying in {delay} seconds (Attempt {attempt + 1}/{retries})..."
)
time.sleep(delay)

raise RuntimeError(f"Failed to start vLLM server at {server_url} after {retries} retries.")
raise RuntimeError(
f"Failed to start vLLM server at {server_url} after {retries} retries."
)

# This seems like excessive effort to stop the vllm process, but merely saving & killing the pid doesn't work
# Also, the base image does not include `pkill` cmd, so can't pkill -f vllm.entrypoints.openai.api_server either
Expand All @@ -67,21 +77,30 @@ def stop_vllm_server_by_name():
for process in psutil.process_iter(attrs=["pid", "name", "cmdline"]):
cmdline = process.info.get("cmdline")
if cmdline and "vllm.entrypoints.openai.api_server" in cmdline:
print(f"Found vLLM server process with PID: {process.info['pid']}, terminating...")
print(
f"Found vLLM server process with PID: {process.info['pid']}, terminating..."
)
try:
process.terminate() # Try graceful termination
process.wait(timeout=5) # Wait a bit for it to terminate
if process.is_running():
print(f"Forcefully killing vLLM server process with PID: {process.info['pid']}")
print(
f"Forcefully killing vLLM server process with PID: {process.info['pid']}"
)
process.kill() # Force kill if it's still running
print(f"Successfully stopped vLLM server with PID: {process.info['pid']}")
print(
f"Successfully stopped vLLM server with PID: {process.info['pid']}"
)
except psutil.NoSuchProcess:
print(f"Process with PID {process.info['pid']} no longer exists.")
except psutil.AccessDenied:
print(f"Access denied when trying to terminate process with PID {process.info['pid']}.")
print(
f"Access denied when trying to terminate process with PID {process.info['pid']}."
)
except Exception as e:
print(f"Failed to terminate process with PID {process.info['pid']}. Error: {e}")

print(
f"Failed to terminate process with PID {process.info['pid']}. Error: {e}"
)

import json
import torch
Expand All @@ -93,7 +112,11 @@ def stop_vllm_server_by_name():
candidate_server_url = "http://localhost:8000/v1"

gpu_available = torch.cuda.is_available()
gpu_name = torch.cuda.get_device_name(torch.cuda.current_device()) if gpu_available else "No GPU available"
gpu_name = (
torch.cuda.get_device_name(torch.cuda.current_device())
if gpu_available
else "No GPU available"
)
gpu_count = torch.cuda.device_count() if gpu_available else 0

print(f"GPU Available: {gpu_available}, {gpu_name}")
Expand All @@ -108,12 +131,12 @@ def stop_vllm_server_by_name():

# TODO: Using evaluator results in connection errors, need to determine why.
# For now, using mt_bench_answers.generate_answers & mt_bench_judgment.generate_judgment
#evaluator = MTBenchEvaluator(
# evaluator = MTBenchEvaluator(
# model_name=candidate_model_name,
# judge_model_name=judge_model_name,
# max_workers=max_workers,
# merge_system_user_message=merge_system_user_message
#)
# )

judge_api_key = os.getenv("JUDGE_API_KEY", "")
judge_model_name = os.getenv("JUDGE_NAME")
Expand All @@ -125,7 +148,7 @@ def stop_vllm_server_by_name():
for model_name in models_list:
print(f"Serving candidate model: {model_name}")
model_path = f"{models_path_prefix}/{model_name}"

# Launch the vLLM server and wait until it is ready
launch_vllm_server_background(model_path, gpu_count)

Expand All @@ -135,18 +158,20 @@ def stop_vllm_server_by_name():
model_name=model_path,
model_api_base=candidate_server_url,
output_dir="/tmp/eval_output",
max_workers=max_workers
max_workers=max_workers,
)

print("Judging answers...")
overall_score, qa_pairs, turn_scores, error_rate = mt_bench_judgment.generate_judgment(
model_name=model_path,
judge_model_name=judge_model_name,
model_api_base=judge_endpoint,
api_key=judge_api_key,
output_dir="/tmp/eval_output",
max_workers=max_workers,
merge_system_user_message=merge_system_user_message
overall_score, qa_pairs, turn_scores, error_rate = (
mt_bench_judgment.generate_judgment(
model_name=model_path,
judge_model_name=judge_model_name,
model_api_base=judge_endpoint,
api_key=judge_api_key,
output_dir="/tmp/eval_output",
max_workers=max_workers,
merge_system_user_message=merge_system_user_message,
)
)

stop_vllm_server_by_name()
Expand All @@ -164,21 +189,22 @@ def stop_vllm_server_by_name():
all_mt_bench_data.append(mt_bench_data)
scores[model_path] = overall_score

with open(mt_bench_output.path, 'w') as f:
with open(mt_bench_output.path, "w") as f:
json.dump(all_mt_bench_data, f, indent=4)

outputs = NamedTuple('outputs', best_model=str, best_score=float)
outputs = NamedTuple("outputs", best_model=str, best_score=float)
best_model = max(scores, key=scores.get)
best_score = scores[best_model]
return outputs(best_model=best_model, best_score=best_score)


@component(base_image=PYTHON_IMAGE)
def load_mt_bench_results_op(mt_bench_output: Input[Artifact]) -> list:
import json

mt_bench_score_list = []
with open(mt_bench_output.path, 'r') as f:
mt_bench_score_list = json.load(f)
with open(mt_bench_output.path, "r") as f:
mt_bench_score_list = json.load(f)

print("MT_Bench Evaluation Data:")
for mt_bench_score in mt_bench_score_list:
Expand Down
Loading

0 comments on commit f3f946b

Please sign in to comment.