config.cfg

[OpenAI]
# Optional. Set these if you are testing OpenAI models.
api_key = 

# Optional. Set this if you are using an alternative OpenAI compatible endpoint, like ollama running locally
#openai_compatible_url = http://localhost:8080/v1/
#openai_compatible_url = https://api.together.xyz

[Huggingface]
# Optional. This allows the script to download gated models.
access_token =

# Optional. This is where models will be downloaded to.
# - defaults to ~/.cache/huggingface/hub
cache_dir = 

[Results upload]
# Optional. Set this to allow uploading of results to a google sheets spreadsheet.
# Note: this feature requires extra configuration (see README).
google_spreadsheet_url =

[Creative Writing Benchmark]
# judge_model_api: openai / mistralai / anthropic
judge_model_api = 
judge_model = 
judge_model_api_key = 

[Options]
# Set to true or false
trust_remote_code = true

[Oobabooga config]
# e.g. ~/text-generation-webui/start_linux.sh
ooba_launch_script = 

# Specify any additional oobabooga launch params (this can be overridden on a per-model basis).
# e.g.:
# --auto-devices --loader llama.cpp
ooba_params_global = 

# Set to true or false. Setting to "true" only supports linux (and possibly mac).
# If set to false, you must launch ooba yourself with --api flag and load the model yourself before running the benchmark.
# If you are launching ooba yourself, the model_path param will be ignored, and all scheduled ooba benchmark runs will use the same model.
automatically_launch_ooba = true

# Ooba api request timeout in seconds (default 120). Set higher if you are expecting long inference times.
ooba_request_timeout = 600

[Benchmarks to run]
# Define benchmarks in the following format:
# run_id, instruction_template, model_path, lora_path, quantization, n_iterations, inference_engine, ooba_params, downloader_filters

# Details:
#
# - run_id: A name to identify the benchmark run
# - instruction_template: The filename of the instruction template defining the prompt format, minus the .yaml (e.g. Alpaca)
# - model_path: Huggingface model ID, local path, or OpenAI model name
# - lora_path (optional): Path to local lora adapter
# - quantization: Using bitsandbytes package (8bit, 4bit, None)
# - n_iterations: Number of benchmark iterations (final score will be an average)
# - inference_engine: Set this to transformers, openai, ooba or llama.cpp.
# - ooba_params (optional): Any additional ooba params for loading this model (overrides the global setting above)
# - downloader_filters (optional): Specify --include or --exclude patterns (using same syntax as huggingface-cli download)

# Examples:
#
# myrun1, openai_api, gpt-3.5-turbo, , , 1, openai, ,
# myrun2, Llama-v2, meta-llama/Llama-2-7b-chat-hf, /path/to/local/lora/adapter, 8bit, 3, transformers, , ,
# myrun3, Alpaca, ~/my_local_model, , None, 1, ooba, --loader transformers --n_ctx 1024 --n-gpu-layers -1, 
# myrun4, Mistral, TheBloke/Mistral-7B-Instruct-v0.2-GGUF, , None, 1, ooba, --loader llama.cpp --n-gpu-layers -1 --tensor_split 1,3,5,7, --include ["*Q3_K_M.gguf", "*.json"]
# myrun5, Mistral, mistralai/Mistral-7B-Instruct-v0.2, , None, 1, ooba, --loader transformers --gpu-memory 12, --exclude "*.bin"
# myrun6, ChatML, model_name, , None, 1, llama.cpp, None,