-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
548 additions
and
363 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
from evalforge.forge import EvalForge | ||
from evalforge.alignment import calculate_alignment_metrics, format_alignment_metrics | ||
from evalforge.alignment import calculate_alignment_metrics, format_alignment_metrics | ||
|
||
|
||
__all__ = ["EvalForge", "calculate_alignment_metrics", "format_alignment_metrics"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,80 +1,56 @@ | ||
import asyncio | ||
from typing import Optional | ||
import simple_parsing | ||
from simple_parsing import Serializable | ||
from dataclasses import dataclass, Field | ||
from dataclasses import dataclass | ||
import sys | ||
|
||
import weave | ||
|
||
from evalforge.forge import EvalForge | ||
from evalforge.utils import logger | ||
from evalforge.data_utils import load_data, DataPoint | ||
from evalforge.data_utils import load_data | ||
|
||
train_ds_formatted = [ | ||
DataPoint( | ||
input_data={"text": "1+1="}, | ||
output_data={"text": "2"}, | ||
annotation=1, | ||
note="Correct summation", | ||
), | ||
DataPoint( | ||
input_data={"text": "1+1="}, | ||
output_data={"text": "3"}, | ||
annotation=0, | ||
note="Incorrect summation", | ||
), | ||
DataPoint( | ||
input_data={"text": "What is the square root of 16?"}, | ||
output_data={"text": "4"}, | ||
annotation=1, | ||
note="Correct square root", | ||
), | ||
] | ||
MINI_DATASET_PATH = "data/mini_data.jsonl" | ||
|
||
eval_ds_formatted = [ | ||
DataPoint( | ||
input_data={"text": "What is the square root of 16?"}, | ||
output_data={"text": "4"}, | ||
annotation=1, | ||
note="Correct square root", | ||
), | ||
DataPoint( | ||
input_data={"text": "What is the square root of 16?"}, | ||
output_data={"text": "3"}, | ||
annotation=0, | ||
note="Incorrect square root", | ||
), | ||
] | ||
|
||
@dataclass | ||
class Args(Serializable): | ||
data: str = "mini"# "Path to training data" | ||
batch_size: int = 1 # "Batch size" | ||
num_criteria_to_generate: int = 1 # "Number of criteria to generate" | ||
llm_model: str = "gpt-4o" # "LLM model to use" | ||
data: str = "mini" # "Path to training data" | ||
batch_size: int = 1 # "Batch size" | ||
num_criteria_to_generate: int = 1 # "Number of criteria to generate" | ||
llm_model: str = "gpt-4o" # "LLM model to use" | ||
weave_project: Optional[str] = None # "Weave project to use" | ||
|
||
|
||
def forge(): | ||
logger.rule("EvalForge CLI") | ||
try: | ||
args = simple_parsing.parse(Args) | ||
|
||
# Load the data | ||
# Log into Weave | ||
if args.weave_project: | ||
weave.init(args.weave_project) | ||
|
||
# Load the data | ||
if args.data == "mini": | ||
logger.info(f"Running dummy data") | ||
train_data = train_ds_formatted | ||
logger.info("Running dummy data") | ||
train_data = load_data(MINI_DATASET_PATH) | ||
else: | ||
logger.info(f"Loading data from {args.data}") | ||
train_data = load_data(args.data) | ||
|
||
forger = EvalForge( | ||
batch_size=args.batch_size, | ||
num_criteria_to_generate=args.num_criteria_to_generate, | ||
llm_model=args.llm_model | ||
batch_size=args.batch_size, | ||
num_criteria_to_generate=args.num_criteria_to_generate, | ||
llm_model=args.llm_model, | ||
) | ||
# Run the fit method | ||
asyncio.run(forger.fit(train_data)) | ||
except Exception as e: | ||
print(f"An error occurred: {e}") | ||
sys.exit(1) | ||
|
||
|
||
if __name__ == "__main__": | ||
forge() | ||
forge() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.