forked from EQ-bench/EQ-Bench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.cfg
72 lines (59 loc) · 3.18 KB
/
config.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
[OpenAI]
# Optional. Set these if you are testing OpenAI models.
api_key =
# Optional. Set this if you are using an alternative OpenAI compatible endpoint, like ollama running locally
#openai_compatible_url = http://localhost:8080/v1/
#openai_compatible_url = https://api.together.xyz
[Huggingface]
# Optional. This allows the script to download gated models.
access_token =
# Optional. This is where models will be downloaded to.
# - defaults to ~/.cache/huggingface/hub
cache_dir =
[Results upload]
# Optional. Set this to allow uploading of results to a google sheets spreadsheet.
# Note: this feature requires extra configuration (see README).
google_spreadsheet_url =
[Creative Writing Benchmark]
# judge_model_api: openai / mistralai / anthropic
judge_model_api =
judge_model =
judge_model_api_key =
[Options]
# Set to true or false
trust_remote_code = true
[Oobabooga config]
# e.g. ~/text-generation-webui/start_linux.sh
ooba_launch_script =
# Specify any additional oobabooga launch params (this can be overridden on a per-model basis).
# e.g.:
# --auto-devices --loader llama.cpp
ooba_params_global =
# Set to true or false. Setting to "true" only supports linux (and possibly mac).
# If set to false, you must launch ooba yourself with --api flag and load the model yourself before running the benchmark.
# If you are launching ooba yourself, the model_path param will be ignored, and all scheduled ooba benchmark runs will use the same model.
automatically_launch_ooba = true
# Ooba api request timeout in seconds (default 120). Set higher if you are expecting long inference times.
ooba_request_timeout = 600
[Benchmarks to run]
# Define benchmarks in the following format:
# run_id, instruction_template, model_path, lora_path, quantization, n_iterations, inference_engine, ooba_params, downloader_filters
# Details:
#
# - run_id: A name to identify the benchmark run
# - instruction_template: The filename of the instruction template defining the prompt format, minus the .yaml (e.g. Alpaca)
# - model_path: Huggingface model ID, local path, or OpenAI model name
# - lora_path (optional): Path to local lora adapter
# - quantization: Using bitsandbytes package (8bit, 4bit, None)
# - n_iterations: Number of benchmark iterations (final score will be an average)
# - inference_engine: Set this to transformers, openai, ooba or llama.cpp.
# - ooba_params (optional): Any additional ooba params for loading this model (overrides the global setting above)
# - downloader_filters (optional): Specify --include or --exclude patterns (using same syntax as huggingface-cli download)
# Examples:
#
# myrun1, openai_api, gpt-3.5-turbo, , , 1, openai, ,
# myrun2, Llama-v2, meta-llama/Llama-2-7b-chat-hf, /path/to/local/lora/adapter, 8bit, 3, transformers, , ,
# myrun3, Alpaca, ~/my_local_model, , None, 1, ooba, --loader transformers --n_ctx 1024 --n-gpu-layers -1,
# myrun4, Mistral, TheBloke/Mistral-7B-Instruct-v0.2-GGUF, , None, 1, ooba, --loader llama.cpp --n-gpu-layers -1 --tensor_split 1,3,5,7, --include ["*Q3_K_M.gguf", "*.json"]
# myrun5, Mistral, mistralai/Mistral-7B-Instruct-v0.2, , None, 1, ooba, --loader transformers --gpu-memory 12, --exclude "*.bin"
# myrun6, ChatML, model_name, , None, 1, llama.cpp, None,