Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] feat: update to use azure api #340

Merged
merged 5 commits into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions lmms_eval/api/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,7 @@ def _download_from_youtube(path):
accelerator = Accelerator()
if accelerator.is_main_process:
dataset_kwargs.pop("From_YouTube")
assert "load_from_disk" not in dataset_kwargs, "load_from_disk must not be True when From_YouTube is True"
self.all_dataset = datasets.load_dataset(
path=self.DATASET_PATH,
name=self.DATASET_NAME,
Expand Down Expand Up @@ -1033,13 +1034,19 @@ def concat_tar_parts(tar_parts, output_tar):
if "create_link" in dataset_kwargs:
dataset_kwargs.pop("create_link")

self.dataset = datasets.load_dataset(
path=self.DATASET_PATH,
name=self.DATASET_NAME,
download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS,
download_config=download_config,
**dataset_kwargs if dataset_kwargs is not None else {},
)
if "load_from_disk" in dataset_kwargs and dataset_kwargs["load_from_disk"]:
dataset_kwargs.pop("load_from_disk")
# using local task in offline environment, need to process the online dataset into local format via
# `ds = load_datasets("lmms-lab/MMMU")`
self.dataset = datasets.load_from_disk(path=self.DATASET_PATH, name=self.DATASET_NAME)
else:
self.dataset = datasets.load_dataset(
path=self.DATASET_PATH,
name=self.DATASET_NAME,
download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS,
download_config=download_config,
**dataset_kwargs if dataset_kwargs is not None else {},
)

if self.config.process_docs is not None:
for split in self.dataset:
Expand Down
3 changes: 3 additions & 0 deletions lmms_eval/tasks/llava-in-the-wild/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def get_eval(content: str, max_tokens: int, retries: int = 5):
"max_tokens": max_tokens,
}

if API_TYPE == "azure":
payload.pop("model")

for attempt in range(retries):
try:
response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
Expand Down
20 changes: 18 additions & 2 deletions lmms_eval/tasks/mathverse/mathverse_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,27 @@


class MathVerseEvaluator:
API_URL = "https://api.openai.com/v1/chat/completions"
API_TYPE = os.getenv("API_TYPE", "openai")

def __init__(self, api_key, gpt_model="gpt-3.5-turbo"):
if API_TYPE == "openai":
API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

def __init__(self, api_key, gpt_model="gpt-3.5-turbo", quick_extract=False):
self.api_key = api_key
self.gpt_model = gpt_model
self.quick_extract = quick_extract

def _post_request(self, payload):
headers = {
Expand Down
21 changes: 20 additions & 1 deletion lmms_eval/tasks/mathvista/mathvista_evals.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import re
import time

Expand Down Expand Up @@ -145,7 +146,22 @@ def is_valid_triangle(a, b, perimeter):


class MathVistaEvaluator:
API_URL = "https://api.openai.com/v1/chat/completions"
API_TYPE = os.getenv("API_TYPE", "openai")

if API_TYPE == "openai":
API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

def __init__(self, api_key, gpt_model="gpt-3.5-turbo", quick_extract=False):
self.api_key = api_key
Expand All @@ -167,6 +183,9 @@ def get_chat_response(self, prompt, temperature=0, max_tokens=256, n=1, patience
]
payload = {"model": self.gpt_model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "n": n}

if self.API_TYPE == "azure":
payload.pop("model")

while patience > 0:
patience -= 1
try:
Expand Down
19 changes: 18 additions & 1 deletion lmms_eval/tasks/mathvista/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,24 @@

config = yaml.safe_load("".join(safe_data))

mathvista_evaluator = MathVistaEvaluator(api_key=os.getenv("OPENAI_API_KEY", "YOUR_API_KEY"), gpt_model=config["metadata"]["gpt_eval_model_name"])

API_TYPE = os.getenv("API_TYPE", "openai")
if API_TYPE == "openai":
API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

mathvista_evaluator = MathVistaEvaluator(api_key=API_KEY, gpt_model=config["metadata"]["gpt_eval_model_name"])


def mathvista_doc_to_visual(doc):
Expand Down
22 changes: 20 additions & 2 deletions lmms_eval/tasks/mmvet/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,23 @@

config = yaml.safe_load("".join(safe_data))

API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
API_TYPE = os.getenv("API_TYPE", "openai")

if API_TYPE == "openai":
API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

GPT_EVAL_MODEL_NAME = config["metadata"]["gpt_eval_model_name"]
MM_VET_PROMPT = """Compare the ground truth and prediction from AI models, to give a correctness score for the prediction. <AND> in the ground truth means it is totally right only when all elements in the ground truth are present in the prediction, and <OR> means it is totally right when any one element in the ground truth is present in the prediction. The correctness score is 0.0 (totally wrong), 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, or 1.0 (totally right). Just complete the last space of the correctness score.
gpt_query_prompt | Ground truth | Prediction | Correctness
Expand Down Expand Up @@ -50,6 +65,9 @@ def get_chat_response(prompt, model=GPT_EVAL_MODEL_NAME, temperature=0.0, max_to
"max_tokens": max_tokens,
}

if API_TYPE == "azure":
payload.pop("model")

while patience > 0:
patience -= 1
try:
Expand Down
Loading